From 7a95581d661d0f68b5b4ea52ab99ed05aee8d0b9 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 30 Nov 2022 16:30:46 +0000 Subject: [PATCH 01/99] use inventory_hostname.cluster.tld for hostnames --- .../terraform/inventory.tpl | 2 +- .../{{cookiecutter.environment}}/terraform/nodes.tf | 12 ++++++------ .../terraform/variables.tf | 6 ++++++ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl index ecfecdb6d..32022bbd8 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl @@ -24,7 +24,7 @@ ${compute.name} ansible_host=${compute.all_fixed_ips[0]} [${cluster_name}_${type_name}] %{~ for node_name, node_type in compute_nodes ~} %{~ if node_type == type_name ~} -${cluster_name}-${node_name} +${computes[node_name].name} %{~ endif ~} %{~ endfor ~} %{ endfor ~} diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf index e91babd94..17b9d5a5b 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf @@ -11,7 +11,7 @@ resource "openstack_networking_port_v2" "login" { for_each = toset(keys(var.login_nodes)) - name = "${var.cluster_name}-${each.key}" + name = "${each.key}.${var.cluster_name}.${var.tld}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -29,7 +29,7 @@ resource "openstack_networking_port_v2" "login" { resource "openstack_networking_port_v2" "control" { - name = "${var.cluster_name}-control" + name = "control.${var.cluster_name}.${var.tld}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -49,7 +49,7 @@ resource "openstack_networking_port_v2" "compute" { for_each = toset(keys(var.compute_nodes)) - name = "${var.cluster_name}-${each.key}" + name = "${each.key}.${var.cluster_name}.${var.tld}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -70,7 +70,7 @@ resource "openstack_compute_instance_v2" "control" { for_each = var.create_nodes ? toset(["control"]) : toset([]) - name = "${var.cluster_name}-${each.key}" + name = "control.${var.cluster_name}.${var.tld}" image_name = data.openstack_images_image_v2.control.name flavor_name = var.control_node.flavor key_pair = var.key_pair @@ -138,7 +138,7 @@ resource "openstack_compute_instance_v2" "login" { for_each = var.create_nodes ? var.login_nodes : {} - name = "${var.cluster_name}-${each.key}" + name = "${each.key}.${var.cluster_name}.${var.tld}" image_name = each.value.image flavor_name = each.value.flavor key_pair = var.key_pair @@ -164,7 +164,7 @@ resource "openstack_compute_instance_v2" "compute" { for_each = var.create_nodes ? var.compute_nodes : {} - name = "${var.cluster_name}-${each.key}" + name = "${each.key}.${var.cluster_name}.${var.tld}" image_name = lookup(var.compute_images, each.key, var.compute_types[each.value].image) flavor_name = var.compute_types[each.value].flavor key_pair = var.key_pair diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf index b1673346b..03a31f5b0 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf @@ -3,6 +3,12 @@ variable "cluster_name" { description = "Name for cluster, used as prefix for resources" } +variable "tld" { + type = string + description = "Top level domain name" + default = "invalid" +} + variable "cluster_net" { type = string description = "Name of existing cluster network" From 86dd61b38c6ee2611d412b0fdc38ce6286448d01 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 30 Nov 2022 16:32:29 +0000 Subject: [PATCH 02/99] disable etc_hosts in arcus --- environments/arcus/inventory/extra_groups | 3 --- 1 file changed, 3 deletions(-) diff --git a/environments/arcus/inventory/extra_groups b/environments/arcus/inventory/extra_groups index cc87628e7..746a16d97 100644 --- a/environments/arcus/inventory/extra_groups +++ b/environments/arcus/inventory/extra_groups @@ -4,6 +4,3 @@ cluster [rebuild:children] control compute - -[etc_hosts:children] -cluster From 03f0381aa17c7ef8ddf0c88e21f588eab9ec8d5f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 30 Nov 2022 16:34:24 +0000 Subject: [PATCH 03/99] get freeipa-server working with ssh tunneled web GUI --- ansible/.gitignore | 4 +- ansible/iam.yml | 10 +++ ansible/roles/freeipa/defaults/main.yml | 5 ++ ansible/roles/freeipa/tasks/server.yml | 64 +++++++++++++++++++ ansible/roles/passwords/defaults/main.yml | 2 + .../inventory/group_vars/freeipa_server.yml | 3 + environments/common/inventory/groups | 3 + environments/common/layouts/everything | 4 ++ 8 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 ansible/roles/freeipa/defaults/main.yml create mode 100644 ansible/roles/freeipa/tasks/server.yml create mode 100644 environments/common/inventory/group_vars/freeipa_server.yml diff --git a/ansible/.gitignore b/ansible/.gitignore index bf09f0468..8301a5cea 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -33,4 +33,6 @@ roles/* !roles/mysql/ !roles/mysql/** !roles/systemd/ -!roles/systemd/** \ No newline at end of file +!roles/systemd/** +!roles/freeipa/ +!roles/freeipa/** diff --git a/ansible/iam.yml b/ansible/iam.yml index 266bca1ab..981a7ef26 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -1,3 +1,13 @@ +- hosts: freeipa_server + tags: + - freeipa_server + gather_facts: yes + tasks: + - import_role: + name: freeipa + tasks_from: server.yml + become: yes + - hosts: basic_users become: yes tags: diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml new file mode 100644 index 000000000..cee8421a2 --- /dev/null +++ b/ansible/roles/freeipa/defaults/main.yml @@ -0,0 +1,5 @@ +#freeipa_realm: +freeipa_domain: "{{ freeipa_realm | lower }}" +#freeipa_ds_password: +#freeipa_admin_password: +freeipa_server_ip: "{{ ansible_default_ipv4.address }}" diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml new file mode 100644 index 000000000..f743cab5e --- /dev/null +++ b/ansible/roles/freeipa/tasks/server.yml @@ -0,0 +1,64 @@ +# Based on https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/installing_identity_management/preparing-the-system-for-ipa-server-installation_installing-identity-management#host-name-and-dns-requirements-for-ipa_preparing-the-system-for-ipa-server-installation + +- name: Install freeipa server packages + dnf: + name: '@idm:DL1/dns' + state: present + +# TODO: set file mask but that's a bit horrendous? +# TODO: verify etc_hosts is empty? +# TODO: verify hostname is fully-qualified +- name: Install ipa server +# TODO: make no-ui-redirect configurable?? + command: + cmd: > + ipa-server-install + --realm {{ freeipa_realm | quote }} + --domain {{ freeipa_domain | lower | quote }} + --ds-password {{ freeipa_ds_password | quote }} + --admin-password {{ freeipa_admin_password | quote }} + --ip-address={{ freeipa_server_ip }} + --setup-dns + --auto-forwarders + --no-dnssec-validation + --no-ntp + --unattended + --no-ui-redirect + + register: _ipa_server_install + changed_when: _ipa_server_install.rc == 0 + failed_when: > + (_ipa_server_install.rc != 0) and + ('IPA server is already configured' not in _ipa_server_install.stderr) + +- name: Disable redirects to hard-coded domain + # see https://pagure.io/freeipa/issue/7479 + replace: path=/etc/httpd/conf.d/ipa-rewrite.conf regexp='{{ item.regexp }}' replace='{{ item.replace }}' + with_items: + # RewriteRule ^/$ https://${FQDN}/ipa/ui [L,NC,R=301] - irrelevant if using --no-ui-redirect + - regexp: '^(RewriteRule \^/\$) (https://.*)(/ipa/ui.*)$' + replace: '\1 \3' + # RewriteRule ^/ipa/(.*) - occurs twice + - regexp: '^(RewriteRule \^\/ipa\/\(.*)$' + replace: '#\1' + - regexp: '^(RewriteCond .*)$' + replace: '#\1' + # RewriteRule ^/(.*) https://${FQDN}/$1 [L,R=301] + - regexp: '^(RewriteRule \^/\(\.\*\).*)$' + replace: '#\1' + register: _replace_freeipa_rewrites + +- name: Deactivate HTTP RefererError + replace: + path: '/usr/lib/python3.6/site-packages/ipaserver/rpcserver.py' + regexp: '{{ item }}' + replace: '\1pass # \2' + with_items: + - "^([ ]*)(return self.marshal\\(result, RefererError\\(referer)" + register: _replace_rpcserver_referrer + +- name: Reload apache configuration + service: + name: httpd + state: reloaded + when: _replace_freeipa_rewrites.changed or _replace_rpcserver_referrer.changed diff --git a/ansible/roles/passwords/defaults/main.yml b/ansible/roles/passwords/defaults/main.yml index f528491f2..55680ae37 100644 --- a/ansible/roles/passwords/defaults/main.yml +++ b/ansible/roles/passwords/defaults/main.yml @@ -6,6 +6,8 @@ slurm_appliance_secrets: vault_mysql_root_password: "{{ secrets_openhpc_mysql_root_password | default(vault_mysql_root_password | default(lookup('password', '/dev/null'))) }}" vault_mysql_slurm_password: "{{ secrets_openhpc_mysql_slurm_password | default(vault_mysql_slurm_password | default(lookup('password', '/dev/null'))) }}" vault_openhpc_mungekey: "{{ secrets_openhpc_mungekey | default(vault_openhpc_mungekey | default(secrets_openhpc_mungekey_default)) }}" + vault_freeipa_ds_password: "{{ vault_freeipa_ds_password | default(lookup('password', '/dev/null')) }}" + vault_freeipa_admin_password: "{{ vault_freeipa_admin_password | default(lookup('password', '/dev/null')) }}" secrets_openhpc_mungekey_default: content: "{{ lookup('pipe', 'dd if=/dev/urandom bs=1 count=1024 2>/dev/null | base64') }}" diff --git a/environments/common/inventory/group_vars/freeipa_server.yml b/environments/common/inventory/group_vars/freeipa_server.yml new file mode 100644 index 000000000..8cba5acf9 --- /dev/null +++ b/environments/common/inventory/group_vars/freeipa_server.yml @@ -0,0 +1,3 @@ +freeipa_realm: "{{ openhpc_cluster_name | upper }}.INVALID" +freeipa_ds_password: "{{ vault_freeipa_ds_password }}" +freeipa_admin_password: "{{ vault_freeipa_admin_password }}" diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index 98be33c66..867577ac4 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -104,3 +104,6 @@ opendistro grafana control prometheus + +[freeipa_server] +# Hosts to be a FreeIPA server. See ansible/roles/freeipa/README.md diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 6e2538cc4..2725a8430 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -54,3 +54,7 @@ compute [etc_hosts] # Hosts to manage /etc/hosts e.g. if no internal DNS. See ansible/roles/etc_hosts/README.md + +[freeipa_server:children] +# Hosts to be a FreeIPA server. See ansible/roles/freeipa/README.md +control From f353472e33337dd5ed5559f6202f045322c55a6b Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 1 Dec 2022 14:26:43 +0000 Subject: [PATCH 04/99] add freeipa hosts and enrole them --- ansible/iam.yml | 16 ++++++++ ansible/roles/freeipa/defaults/main.yml | 4 +- ansible/roles/freeipa/tasks/addhost.yml | 18 +++++++++ ansible/roles/freeipa/tasks/client.yml | 39 +++++++++++++++++++ .../{freeipa_server.yml => freeipa.yml} | 1 + environments/common/inventory/groups | 8 ++++ environments/common/layouts/everything | 4 ++ 7 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 ansible/roles/freeipa/tasks/addhost.yml create mode 100644 ansible/roles/freeipa/tasks/client.yml rename environments/common/inventory/group_vars/{freeipa_server.yml => freeipa.yml} (55%) diff --git a/ansible/iam.yml b/ansible/iam.yml index 981a7ef26..e8fd93437 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -1,5 +1,6 @@ - hosts: freeipa_server tags: + - freeipa - freeipa_server gather_facts: yes tasks: @@ -8,6 +9,21 @@ tasks_from: server.yml become: yes +- hosts: freeipa_client + tags: + - freeipa + - freeipa_client + gather_facts: yes + tasks: + - import_role: + name: freeipa + tasks_from: addhost.yml + become: yes + - import_role: + name: freeipa + tasks_from: client.yml + become: yes + - hosts: basic_users become: yes tags: diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml index cee8421a2..e7656cc75 100644 --- a/ansible/roles/freeipa/defaults/main.yml +++ b/ansible/roles/freeipa/defaults/main.yml @@ -2,4 +2,6 @@ freeipa_domain: "{{ freeipa_realm | lower }}" #freeipa_ds_password: #freeipa_admin_password: -freeipa_server_ip: "{{ ansible_default_ipv4.address }}" +#freeipa_server_ip: +freeipa_client_ip: "{{ ansible_host }}" # when run on freeipa_client group! +# freeipa_client_randompassword: # auto set diff --git a/ansible/roles/freeipa/tasks/addhost.yml b/ansible/roles/freeipa/tasks/addhost.yml new file mode 100644 index 000000000..94636a895 --- /dev/null +++ b/ansible/roles/freeipa/tasks/addhost.yml @@ -0,0 +1,18 @@ +- name: Add host to IPA + # annoyingly this always shows as changed! + community.general.ipa_host: + name: "{{ inventory_hostname }}" + ip_address: "{{ freeipa_client_ip }}" + ipa_host: "{{ groups['freeipa_server'].0 }}" + ipa_pass: "{{ vault_freeipa_admin_password }}" + ipa_user: admin + random_password: true + state: present + validate_certs: false + delegate_to: "{{ groups['freeipa_server'].0 }}" + register: _ipa_host_add +- debug: + var: _ipa_host_add + +- set_fact: + freeipa_client_randompassword: "{{ _ipa_host_add.host.randompassword }}" # could use ipa host-mod --random to set a new one, if necessary diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml new file mode 100644 index 000000000..5c8fdc8b9 --- /dev/null +++ b/ansible/roles/freeipa/tasks/client.yml @@ -0,0 +1,39 @@ +# based on https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/installing_identity_management/assembly_installing-an-idm-client_installing-identity-management + +- name: Install FreeIPA client package + dnf: + name: ipa-client + +- name: Set IPA server as nameserver + copy: + dest: /etc/resolv.conf + content: | + ; Created by slurm appliance/ansible/roles/freeipa/tasks/client.yml + ; + + nameserver {{ freeipa_server_ip }} + nameserver 131.111.12.20 + owner: root + group: root + mode: u=rw,og=r + +- name: Enrole with FreeIPA + # reenrolment requires --force-join and --password or --keytab + # renrolement means: + # 1. A new host certificate is issued + # 2. The old host certificate is revoked + # 3. New SSH keys are generated + # 4. ipaUniqueID is preserved + # --password is overloaded - its bulkpassword unless --prinicpal is used in which case it's admin password + command: > + ipa-client-install + --unattended + --mkhomedir + --force-join + --principal admin + --password {{ freeipa_admin_password | quote }} + register: ipa_client_install + changed_when: ipa_client_install.rc == 0 + failed_when: > + ipa_client_install.rc != 0 and + 'IPA client is already configured' not in ipa_client_install.stderr diff --git a/environments/common/inventory/group_vars/freeipa_server.yml b/environments/common/inventory/group_vars/freeipa.yml similarity index 55% rename from environments/common/inventory/group_vars/freeipa_server.yml rename to environments/common/inventory/group_vars/freeipa.yml index 8cba5acf9..31212b2a7 100644 --- a/environments/common/inventory/group_vars/freeipa_server.yml +++ b/environments/common/inventory/group_vars/freeipa.yml @@ -1,3 +1,4 @@ freeipa_realm: "{{ openhpc_cluster_name | upper }}.INVALID" freeipa_ds_password: "{{ vault_freeipa_ds_password }}" freeipa_admin_password: "{{ vault_freeipa_admin_password }}" +freeipa_server_ip: "{{ hostvars[groups['freeipa_server'].0].ansible_host }}" # not using ansible_default_ipv4.address as that requires facts diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index 867577ac4..0c15a694c 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -107,3 +107,11 @@ prometheus [freeipa_server] # Hosts to be a FreeIPA server. See ansible/roles/freeipa/README.md + +[freeipa_client] +# Hosts to be a FreeIPA client. See ansible/roles/freeipa/README.md + +[freeipa:children] +# Allows defining variables common to freeipa_server and _client +freeipa_server +freeipa_client \ No newline at end of file diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 2725a8430..66ba46c58 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -58,3 +58,7 @@ compute [freeipa_server:children] # Hosts to be a FreeIPA server. See ansible/roles/freeipa/README.md control + +[freeipa_client:children] +login +compute From a34e42dc40534ed3825f1939f5b2e2c391ddae5a Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 2 Dec 2022 09:13:53 +0000 Subject: [PATCH 05/99] don't use basic_users for IAM on arcus --- environments/arcus/inventory/extra_groups | 3 --- .../arcus/inventory/group_vars/all/basic_users.yml | 7 ------- .../arcus/inventory/group_vars/basic_users/overrides.yml | 6 ------ 3 files changed, 16 deletions(-) delete mode 100644 environments/arcus/inventory/group_vars/all/basic_users.yml delete mode 100644 environments/arcus/inventory/group_vars/basic_users/overrides.yml diff --git a/environments/arcus/inventory/extra_groups b/environments/arcus/inventory/extra_groups index 746a16d97..f9ac21530 100644 --- a/environments/arcus/inventory/extra_groups +++ b/environments/arcus/inventory/extra_groups @@ -1,6 +1,3 @@ -[basic_users:children] -cluster - [rebuild:children] control compute diff --git a/environments/arcus/inventory/group_vars/all/basic_users.yml b/environments/arcus/inventory/group_vars/all/basic_users.yml deleted file mode 100644 index 2f90a1d60..000000000 --- a/environments/arcus/inventory/group_vars/all/basic_users.yml +++ /dev/null @@ -1,7 +0,0 @@ -# has to be defined on 'all' group so localhost can template out for cloud-init -testuser_password: "{{ lookup('env', 'TESTUSER_PASSWORD') | default(vault_testuser_password, true) }}" - -basic_users_users: - - name: testuser # can't use rocky as $HOME isn't shared! - password: "{{ testuser_password | password_hash('sha512', 65534 | random(seed=inventory_hostname) | string) }}" # idempotent - uid: 1005 diff --git a/environments/arcus/inventory/group_vars/basic_users/overrides.yml b/environments/arcus/inventory/group_vars/basic_users/overrides.yml deleted file mode 100644 index ae416cf72..000000000 --- a/environments/arcus/inventory/group_vars/basic_users/overrides.yml +++ /dev/null @@ -1,6 +0,0 @@ -test_user_password: "{{ lookup('env', 'TESTUSER_PASSWORD') | default(vault_testuser_password, true) }}" # CI uses env, debug can set vault_testuser_password - -basic_users_users: - - name: testuser # can't use rocky as $HOME isn't shared! - password: "{{ test_user_password | password_hash('sha512', 65534 | random(seed=inventory_hostname) | string) }}" # idempotent - uid: 1005 From 9407ee9e6e9d3f9d473a4e9ec2464082c3880d1f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 2 Dec 2022 09:26:54 +0000 Subject: [PATCH 06/99] add freeipa user handling --- ansible/iam.yml | 5 +++++ ansible/roles/freeipa/README.md | 10 ++++++++++ ansible/roles/freeipa/defaults/main.yml | 4 ++++ ansible/roles/freeipa/tasks/users.yml | 21 +++++++++++++++++++++ 4 files changed, 40 insertions(+) create mode 100644 ansible/roles/freeipa/README.md create mode 100644 ansible/roles/freeipa/tasks/users.yml diff --git a/ansible/iam.yml b/ansible/iam.yml index e8fd93437..423f0b740 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -8,6 +8,11 @@ name: freeipa tasks_from: server.yml become: yes + - import_role: + name: freeipa + tasks_from: users.yml + tags: users + become: yes - hosts: freeipa_client tags: diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md new file mode 100644 index 000000000..bac686ec9 --- /dev/null +++ b/ansible/roles/freeipa/README.md @@ -0,0 +1,10 @@ + + + +# Users + +The dicts in `freeipa_users` take any parameters for the [community.general.ipa_user](https://docs.ansible.com/ansible/latest/collections/community/general/ipa_user_module.html#ansible-collections-community-general-ipa-user-module). Note that: + - Parameters `name`, `givenname` (firstname) and `sn` (surname) are required. + - Parameters `ipa_pass` and `ipa_user` are automatically set by the role. + - The uid and gid are automatically set by FreeIPA. + - If `password` is set, the value should *not* be a hash (unlike `ansible.builtin.user` as used by the `basic_users` role), and it must be changed on first login unless `krbpasswordexpiration` is set to some future date. diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml index e7656cc75..342ef2496 100644 --- a/ansible/roles/freeipa/defaults/main.yml +++ b/ansible/roles/freeipa/defaults/main.yml @@ -5,3 +5,7 @@ freeipa_domain: "{{ freeipa_realm | lower }}" #freeipa_server_ip: freeipa_client_ip: "{{ ansible_host }}" # when run on freeipa_client group! # freeipa_client_randompassword: # auto set +freeipa_user_defaults: + ipa_pass: "{{ freeipa_admin_password | quote }}" + ipa_user: admin +freeipa_users: [] # see community.general.ipa_user diff --git a/ansible/roles/freeipa/tasks/users.yml b/ansible/roles/freeipa/tasks/users.yml new file mode 100644 index 000000000..0405b10dd --- /dev/null +++ b/ansible/roles/freeipa/tasks/users.yml @@ -0,0 +1,21 @@ +- name: Add users to freeipa + # this falls back to the keys from ansible.builtin.user, where they're equivalent + community.general.ipa_user: "{{ freeipa_user_defaults | combine(item) }}" + # name: "{{ item.name }}" + # password: "{{ item.password }}" + # # update_password: always # default + # # uidnumber: "{{ item.uidnumber | default(item.uid) }}" + # # gidnumber: "{{ item.gidnumber | default(item.uidnumber) | default(item.uid) }}" + # givenname: "{{ item.givenname }}" + # sn: "{{ item.sn }}" + # # sshpubkey: "{{ item.sshpubkey | default(item.public_key) | default(omit) }}" + # # have to set this if you don't want an immediately-expired password! + # # krbpasswordexpiration: >- + # # {{ lookup('pipe', 'date --date "1 year" +%Y%m%d%H%M%S') }} + # # ipa_host: not needed, using DNS + # ipa_pass: "{{ freeipa_admin_password | quote }}" + # ipa_user: admin + + # state: present + # userauthtype: password + loop: "{{ freeipa_users }}" From 26d0d7fbfc66ede419be648c722d3ecdf87bdf51 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 2 Dec 2022 09:28:03 +0000 Subject: [PATCH 07/99] use freeipa for arcus/ci users --- environments/arcus/inventory/group_vars/all/freeipa.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 environments/arcus/inventory/group_vars/all/freeipa.yml diff --git a/environments/arcus/inventory/group_vars/all/freeipa.yml b/environments/arcus/inventory/group_vars/all/freeipa.yml new file mode 100644 index 000000000..47f59cfb8 --- /dev/null +++ b/environments/arcus/inventory/group_vars/all/freeipa.yml @@ -0,0 +1,6 @@ +freeipa_users: + - name: testuser # can't use rocky as $HOME isn't shared! + password: "{{ vault_testuser_password }}" + givenname: test + sn: test + krbpasswordexpiration: "{{ lookup('pipe', 'date --date \"1 day\" +%Y%m%d%H%M%S') }}" # password will work for 24hrs From e5d501acb1c520e60a2997dbf7a5a23875c1f8a1 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 2 Dec 2022 11:00:44 +0000 Subject: [PATCH 08/99] add validation for freeipa --- ansible/roles/freeipa/tasks/server.yml | 6 ++--- ansible/roles/freeipa/tasks/users.yml | 17 -------------- ansible/roles/freeipa/tasks/validate.yml | 29 ++++++++++++++++++++++++ ansible/validate.yml | 8 +++++++ 4 files changed, 39 insertions(+), 21 deletions(-) create mode 100644 ansible/roles/freeipa/tasks/validate.yml diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml index f743cab5e..74003c565 100644 --- a/ansible/roles/freeipa/tasks/server.yml +++ b/ansible/roles/freeipa/tasks/server.yml @@ -5,11 +5,9 @@ name: '@idm:DL1/dns' state: present -# TODO: set file mask but that's a bit horrendous? -# TODO: verify etc_hosts is empty? -# TODO: verify hostname is fully-qualified - name: Install ipa server -# TODO: make no-ui-redirect configurable?? +# TODO: make no-ui-redirect and dns configurable?? +# TODO: set file mask as per docs? Would be hard to cope with failures. Doesn't appear to be necessary actually. command: cmd: > ipa-server-install diff --git a/ansible/roles/freeipa/tasks/users.yml b/ansible/roles/freeipa/tasks/users.yml index 0405b10dd..d35d2ab9f 100644 --- a/ansible/roles/freeipa/tasks/users.yml +++ b/ansible/roles/freeipa/tasks/users.yml @@ -1,21 +1,4 @@ - name: Add users to freeipa # this falls back to the keys from ansible.builtin.user, where they're equivalent community.general.ipa_user: "{{ freeipa_user_defaults | combine(item) }}" - # name: "{{ item.name }}" - # password: "{{ item.password }}" - # # update_password: always # default - # # uidnumber: "{{ item.uidnumber | default(item.uid) }}" - # # gidnumber: "{{ item.gidnumber | default(item.uidnumber) | default(item.uid) }}" - # givenname: "{{ item.givenname }}" - # sn: "{{ item.sn }}" - # # sshpubkey: "{{ item.sshpubkey | default(item.public_key) | default(omit) }}" - # # have to set this if you don't want an immediately-expired password! - # # krbpasswordexpiration: >- - # # {{ lookup('pipe', 'date --date "1 year" +%Y%m%d%H%M%S') }} - # # ipa_host: not needed, using DNS - # ipa_pass: "{{ freeipa_admin_password | quote }}" - # ipa_user: admin - - # state: present - # userauthtype: password loop: "{{ freeipa_users }}" diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml new file mode 100644 index 000000000..0da831621 --- /dev/null +++ b/ansible/roles/freeipa/tasks/validate.yml @@ -0,0 +1,29 @@ +- name: Ensure incompatible functionality is not enabled + assert: + that: groups[item] | length == 0 + fail_msg: "The group {{ item }} contains hosts - this cannot be used with freeipa enabled" + loop: + - basic_users + - etc_hosts + run_once: yes + +- name: Ensure role vars without defaults are defined + assert: + that: item is defined + fail_msg: "var {{ item }} must be defined when freeipa is enabled" + loop: + - freeipa_realm + - freeipa_ds_password + - freeipa_admin_password + - freeipa_server_ip # needed on server for installation, needed on client for resolv.conf + run_once: yes + +- name: Get hostname as reported by command + command: hostname + register: _freeipa_validate_hostname + changed_when: false + +- name: Ensure hostname is fully-qualified + # see section 2.7 of redhat guide to installing identity management + assert: + that: _freeipa_validate_hostname.stdout | split('.') | length >= 3 diff --git a/ansible/validate.yml b/ansible/validate.yml index 8087bceb8..08b10eea2 100644 --- a/ansible/validate.yml +++ b/ansible/validate.yml @@ -84,3 +84,11 @@ - openondemand - openondemand_server - grafana + +- name: Validate freeipa configuration + hosts: freeipa + tags: freeipa + tasks: + - import_role: + name: freeipa + tasks_from: validate.yml From 1cd2709779be45396b1c5bff8fddc7f6394a34b3 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 2 Dec 2022 11:01:06 +0000 Subject: [PATCH 09/99] don't use 'fat' image for freeipa --- environments/arcus/terraform/main.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environments/arcus/terraform/main.tf b/environments/arcus/terraform/main.tf index 6c6740274..ba9146279 100644 --- a/environments/arcus/terraform/main.tf +++ b/environments/arcus/terraform/main.tf @@ -17,7 +17,8 @@ variable "create_nodes" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-221118-1422.qcow2" # https://github.com/stackhpc/slurm_image_builder/pull/12 + #default = "openhpc-221118-1422.qcow2" # https://github.com/stackhpc/slurm_image_builder/pull/12 + default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" } module "cluster" { From 6ee0c12d11c007be4a034bcbf7034a9618a105df Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 2 Dec 2022 11:41:22 +0000 Subject: [PATCH 10/99] move freeipa before filesystems so nfs clients can find server --- ansible/site.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/site.yml b/ansible/site.yml index d754e636c..30c4c6b64 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -17,11 +17,11 @@ import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}" when: hook_path | exists +- import_playbook: iam.yml - import_playbook: filesystems.yml - import_playbook: slurm.yml - import_playbook: portal.yml - import_playbook: monitoring.yml -- import_playbook: iam.yml - name: Run post.yml hook vars: From 398e4ae92a1303b8aed127555b086da563b33165 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 2 Dec 2022 16:44:05 +0000 Subject: [PATCH 11/99] enable freeipa reverse DNS to fix slurm --- ansible/roles/freeipa/tasks/client.yml | 1 + ansible/roles/freeipa/tasks/server.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index 5c8fdc8b9..a465a6416 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -30,6 +30,7 @@ --unattended --mkhomedir --force-join + --enable-dns-updates --principal admin --password {{ freeipa_admin_password | quote }} register: ipa_client_install diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml index 74003c565..87db5bb9e 100644 --- a/ansible/roles/freeipa/tasks/server.yml +++ b/ansible/roles/freeipa/tasks/server.yml @@ -17,6 +17,7 @@ --admin-password {{ freeipa_admin_password | quote }} --ip-address={{ freeipa_server_ip }} --setup-dns + --auto-reverse --auto-forwarders --no-dnssec-validation --no-ntp From 5ca2a9c81840db9e7c7c91e9dad5e011fea7d421 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 6 Dec 2022 11:39:14 +0000 Subject: [PATCH 12/99] make inventory_hostname == short hostname so slurm works --- ansible/roles/freeipa/tasks/addhost.yml | 2 +- .../{{cookiecutter.environment}}/terraform/inventory.tf | 1 + .../{{cookiecutter.environment}}/terraform/inventory.tpl | 9 +++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ansible/roles/freeipa/tasks/addhost.yml b/ansible/roles/freeipa/tasks/addhost.yml index 94636a895..901ec6e83 100644 --- a/ansible/roles/freeipa/tasks/addhost.yml +++ b/ansible/roles/freeipa/tasks/addhost.yml @@ -1,7 +1,7 @@ - name: Add host to IPA # annoyingly this always shows as changed! community.general.ipa_host: - name: "{{ inventory_hostname }}" + name: "{{ [inventory_hostname, openhpc_cluster_name, tld] | join('.') }}" # don't want to use ansible_ variables as might not have host in play e.g. during image build ip_address: "{{ freeipa_client_ip }}" ipa_host: "{{ groups['freeipa_server'].0 }}" ipa_pass: "{{ vault_freeipa_admin_password }}" diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf index d7298015c..a93ca0989 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf @@ -2,6 +2,7 @@ resource "local_file" "hosts" { content = templatefile("${path.module}/inventory.tpl", { "cluster_name": var.cluster_name, + "tld": var.tld, "control": openstack_networking_port_v2.control, "state_dir": var.state_dir, "logins": openstack_networking_port_v2.login, diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl index 32022bbd8..e92cf533e 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl @@ -1,9 +1,10 @@ [all:vars] ansible_user=rocky openhpc_cluster_name=${cluster_name} +tld=${tld} [control] -${control.name} ansible_host=${control.all_fixed_ips[0]} +${split(".", control.name)[0]} ansible_host=${control.all_fixed_ips[0]} [control:vars] # NB needs to be set on group not host otherwise it is ignored in packer build! @@ -11,12 +12,12 @@ appliances_state_dir=${state_dir} [login] %{ for login in logins ~} -${login.name} ansible_host=${login.all_fixed_ips[0]} +${split(".", login.name)[0]} ansible_host=${login.all_fixed_ips[0]} %{ endfor ~} [compute] %{ for compute in computes ~} -${compute.name} ansible_host=${compute.all_fixed_ips[0]} +${split(".", compute.name)[0]} ansible_host=${compute.all_fixed_ips[0]} %{ endfor ~} # Define groups for slurm parititions: @@ -24,7 +25,7 @@ ${compute.name} ansible_host=${compute.all_fixed_ips[0]} [${cluster_name}_${type_name}] %{~ for node_name, node_type in compute_nodes ~} %{~ if node_type == type_name ~} -${computes[node_name].name} +${split(".", computes[node_name].name)[0]} %{~ endif ~} %{~ endfor ~} %{ endfor ~} From ab3817af059a854660fc6919a351e60cfebc4852 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 6 Dec 2022 11:42:20 +0000 Subject: [PATCH 13/99] enable short inventory_hostnames to work for etc_hosts too --- ansible/roles/etc_hosts/templates/hosts.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/etc_hosts/templates/hosts.j2 b/ansible/roles/etc_hosts/templates/hosts.j2 index 7cdf11040..71bb47c0d 100644 --- a/ansible/roles/etc_hosts/templates/hosts.j2 +++ b/ansible/roles/etc_hosts/templates/hosts.j2 @@ -1,6 +1,6 @@ 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 -{% for hostname in groups['etc_hosts'] | sort -%} -{{ hostvars[hostname]['ansible_host'] }} {{ hostname }} +{% for inventory_hostname in groups['etc_hosts'] | sort -%} +{{ hostvars[inventory_hostname]['ansible_host'] }} {% if tld is defined %}{{ inventory_hostname }}.{{ openhpc_cluster_name }}.{{ tld }}{% endif %} {{ inventory_hostname }} {% endfor -%} From 9db701c259e0ad3fbf2c312d03f460d3919edba6 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 Dec 2022 11:39:00 +0000 Subject: [PATCH 14/99] validate TLD exists for freeipa --- ansible/roles/freeipa/tasks/validate.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml index 0da831621..612086da4 100644 --- a/ansible/roles/freeipa/tasks/validate.yml +++ b/ansible/roles/freeipa/tasks/validate.yml @@ -7,7 +7,7 @@ - etc_hosts run_once: yes -- name: Ensure role vars without defaults are defined +- name: Ensure role vars without defaults and TLD are defined assert: that: item is defined fail_msg: "var {{ item }} must be defined when freeipa is enabled" @@ -16,6 +16,7 @@ - freeipa_ds_password - freeipa_admin_password - freeipa_server_ip # needed on server for installation, needed on client for resolv.conf + - tld run_once: yes - name: Get hostname as reported by command From b3cd98f568c9803c33df8b69ca1434d8dbf154ba Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 Dec 2022 16:38:57 +0000 Subject: [PATCH 15/99] increase control node size on arcus/CI --- environments/arcus/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/arcus/terraform/main.tf b/environments/arcus/terraform/main.tf index ba9146279..1e238f00c 100644 --- a/environments/arcus/terraform/main.tf +++ b/environments/arcus/terraform/main.tf @@ -30,7 +30,7 @@ module "cluster" { vnic_type = "direct" key_pair = "slurm-app-ci" control_node = { - flavor: "vm.alaska.cpu.general.small" + flavor: "vm.alaska.cpu.general.quarter" image: var.cluster_image } login_nodes = { From 3722450fa3edf742825b3b31286e327e6755778f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 Dec 2022 16:43:24 +0000 Subject: [PATCH 16/99] tidy OOD auth defaults --- ansible/roles/openondemand/defaults/main.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ansible/roles/openondemand/defaults/main.yml b/ansible/roles/openondemand/defaults/main.yml index 23359f01c..e00cc06d3 100644 --- a/ansible/roles/openondemand/defaults/main.yml +++ b/ansible/roles/openondemand/defaults/main.yml @@ -4,13 +4,17 @@ openondemand_auth: # "oidc" or "basic_pam" openondemand_mapping_users: [] -## Variables for `openondemand_auth=oidc` : +# Variables for openondemand_auth='oidc' openondemand_oidc_client_id: openondemand_oidc_client_secret: openondemand_oidc_provider_url: openondemand_oidc_crypto_passphrase: openondemand_oidc_remote_user_claim: preferred_username openondemand_oidc_scope: openid profile preferred_username +openondemand_mapping_users: [] + +# Variables for openondemand_auth='basic_pam' +# None. # SSL Certificates openondemand_ssl_cert: /etc/pki/tls/certs/localhost.crt From 244d1e67a705c7a9fc08a92270cf5bae837e7900 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 Dec 2022 16:53:29 +0000 Subject: [PATCH 17/99] handle tld in rebuild adhoc --- ansible/adhoc/rebuild.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ansible/adhoc/rebuild.yml b/ansible/adhoc/rebuild.yml index c30737fd6..e7e37633a 100644 --- a/ansible/adhoc/rebuild.yml +++ b/ansible/adhoc/rebuild.yml @@ -13,6 +13,8 @@ become: no gather_facts: no tasks: - - command: "openstack server rebuild {{ instance_id | default(inventory_hostname) }}{% if rebuild_image is defined %} --image {{ rebuild_image }}{% endif %}" + - command: "openstack server rebuild {{ instance_id | default(instance_name) }}{% if rebuild_image is defined %} --image {{ rebuild_image }}{% endif %}" delegate_to: localhost + vars: + instance_name: "{{ [inventory_hostname, openhpc_cluster_name, tld] | join('.') if tld is defined else inventory_hostname }}" - wait_for_connection: From ba47527e3078bd4f1186071a83125014e5b1a408 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 13 Dec 2022 16:54:21 +0000 Subject: [PATCH 18/99] don't force enrole freeipa clients - needs separate work --- ansible/roles/freeipa/tasks/client.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index a465a6416..a8d866b01 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -18,18 +18,17 @@ mode: u=rw,og=r - name: Enrole with FreeIPA - # reenrolment requires --force-join and --password or --keytab + # reenrolment requires --force-join and --password, or --keytab # renrolement means: # 1. A new host certificate is issued # 2. The old host certificate is revoked # 3. New SSH keys are generated # 4. ipaUniqueID is preserved - # --password is overloaded - its bulkpassword unless --prinicpal is used in which case it's admin password + # --password is overloaded - its bulkpassword unless --principal or --force-join is used in which case it's admin password command: > ipa-client-install --unattended --mkhomedir - --force-join --enable-dns-updates --principal admin --password {{ freeipa_admin_password | quote }} From a540243f8725fb70c353f8d06a59d1e259e85e42 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 5 Jan 2023 11:39:18 +0000 Subject: [PATCH 19/99] make provision of DNS from freeipa optional --- ansible/roles/freeipa/defaults/main.yml | 1 + ansible/roles/freeipa/tasks/server.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml index 342ef2496..4e41c6d39 100644 --- a/ansible/roles/freeipa/defaults/main.yml +++ b/ansible/roles/freeipa/defaults/main.yml @@ -3,6 +3,7 @@ freeipa_domain: "{{ freeipa_realm | lower }}" #freeipa_ds_password: #freeipa_admin_password: #freeipa_server_ip: +freeipa_setup_dns: false freeipa_client_ip: "{{ ansible_host }}" # when run on freeipa_client group! # freeipa_client_randompassword: # auto set freeipa_user_defaults: diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml index 87db5bb9e..2f14e1a6f 100644 --- a/ansible/roles/freeipa/tasks/server.yml +++ b/ansible/roles/freeipa/tasks/server.yml @@ -16,7 +16,7 @@ --ds-password {{ freeipa_ds_password | quote }} --admin-password {{ freeipa_admin_password | quote }} --ip-address={{ freeipa_server_ip }} - --setup-dns + {% if freeipa_setup_dns | bool %}--setup-dns{% endif %} --auto-reverse --auto-forwarders --no-dnssec-validation From ef995094d863efdd3001125478ad4cc28261cba0 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 10 Jan 2023 08:43:15 +0000 Subject: [PATCH 20/99] separate freeipa server & client (using random password) and add resolv.conf control --- ansible/bootstrap.yml | 47 +++++++++++++ ansible/iam.yml | 13 +--- ansible/roles/freeipa/defaults/main.yml | 4 +- ansible/roles/freeipa/tasks/addhost.yml | 14 ++-- ansible/roles/freeipa/tasks/client.yml | 69 ++++++++++++++----- ansible/roles/freeipa/tasks/users.yml | 1 - ansible/roles/freeipa/tasks/validate.yml | 12 ---- environments/arcus/inventory/extra_groups | 7 ++ .../inventory/group_vars/all/freeipa.yml | 1 + .../inventory/group_vars/all/defaults.yml | 4 +- .../{freeipa.yml => all/freeipa_server.yml} | 2 + environments/common/inventory/groups | 7 +- environments/common/layouts/everything | 8 --- 13 files changed, 129 insertions(+), 60 deletions(-) rename environments/common/inventory/group_vars/{freeipa.yml => all/freeipa_server.yml} (67%) diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index 4c86ee157..b7189bdfa 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -13,6 +13,26 @@ to update these variable names. ** NB: The actual secrets will not be changed.** when: "'secrets_openhpc_' in (hostvars[inventory_hostname] | join)" +- hosts: all + gather_facts: false + tags: resolv_conf + become: yes + tasks: + - name: Set nameservers in /etc/resolv.conf + copy: + content: | + # Created by slurm appliance ansible/bootstrap.yml + search {{ openhpc_cluster_name }}.{{ tld }} + + {% for ns in appliances_nameservers[0:3] %} + nameserver {{ ns }} + {% endfor %} + dest: /etc/resolv.conf + owner: root + group: root + mode: u=rw,og=r + when: appliances_nameservers | length > 0 + - hosts: etc_hosts gather_facts: false tags: etc_hosts @@ -71,6 +91,33 @@ policy: "{{ selinux_policy }}" register: sestatus +- hosts: freeipa_server + # done here as it might be providing DNS + tags: + - freeipa + - freeipa_server + gather_facts: yes + become: yes + tasks: + - name: Install freeipa server + import_role: + name: freeipa + tasks_from: server.yml + +- hosts: freeipa_client + tags: + - freeipa + - freeipa_server # as this is only relevant if using freeipa_server + gather_facts: no + become: yes + tasks: + - import_role: + name: freeipa + tasks_from: addhost.yml + when: groups['freeipa_server'] | length > 0 + +# --- tasks after here require access to package repos --- + - hosts: firewalld gather_facts: false become: yes diff --git a/ansible/iam.yml b/ansible/iam.yml index 423f0b740..6e85f62ba 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -3,31 +3,22 @@ - freeipa - freeipa_server gather_facts: yes + become: yes tasks: - - import_role: - name: freeipa - tasks_from: server.yml - become: yes - import_role: name: freeipa tasks_from: users.yml - tags: users - become: yes - hosts: freeipa_client tags: - freeipa - freeipa_client gather_facts: yes + become: yes tasks: - - import_role: - name: freeipa - tasks_from: addhost.yml - become: yes - import_role: name: freeipa tasks_from: client.yml - become: yes - hosts: basic_users become: yes diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml index 4e41c6d39..69949a6c6 100644 --- a/ansible/roles/freeipa/defaults/main.yml +++ b/ansible/roles/freeipa/defaults/main.yml @@ -5,8 +5,10 @@ freeipa_domain: "{{ freeipa_realm | lower }}" #freeipa_server_ip: freeipa_setup_dns: false freeipa_client_ip: "{{ ansible_host }}" # when run on freeipa_client group! -# freeipa_client_randompassword: # auto set +# freeipa_host_password: freeipa_user_defaults: ipa_pass: "{{ freeipa_admin_password | quote }}" ipa_user: admin freeipa_users: [] # see community.general.ipa_user + +# _freeipa_keytab_backup_path: "{{ appliances_state_dir | default ('') }}/freeipa/{{ inventory_hostname }}/" # must end in / to ensure directory creation TODO: FIXME diff --git a/ansible/roles/freeipa/tasks/addhost.yml b/ansible/roles/freeipa/tasks/addhost.yml index 901ec6e83..92544e318 100644 --- a/ansible/roles/freeipa/tasks/addhost.yml +++ b/ansible/roles/freeipa/tasks/addhost.yml @@ -1,3 +1,10 @@ +- name: Ensure freeipa_server_ip is first in freeipa_client host's resolv.conf + lineinfile: + path: /etc/resolv.conf + line: nameserver {{ freeipa_server_ip }} + firstmatch: true + insertbefore: '^nameserver' + - name: Add host to IPA # annoyingly this always shows as changed! community.general.ipa_host: @@ -11,8 +18,7 @@ validate_certs: false delegate_to: "{{ groups['freeipa_server'].0 }}" register: _ipa_host_add -- debug: - var: _ipa_host_add -- set_fact: - freeipa_client_randompassword: "{{ _ipa_host_add.host.randompassword }}" # could use ipa host-mod --random to set a new one, if necessary +- name: Set fact for ipa host password + set_fact: + freeipa_host_password: "{{ _ipa_host_add.host.randompassword }}" diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index a8d866b01..cdcde78a2 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -16,24 +16,55 @@ owner: root group: root mode: u=rw,og=r + when: freeipa_setup_dns | bool -- name: Enrole with FreeIPA - # reenrolment requires --force-join and --password, or --keytab - # renrolement means: - # 1. A new host certificate is issued - # 2. The old host certificate is revoked - # 3. New SSH keys are generated - # 4. ipaUniqueID is preserved - # --password is overloaded - its bulkpassword unless --principal or --force-join is used in which case it's admin password - command: > - ipa-client-install - --unattended - --mkhomedir - --enable-dns-updates - --principal admin - --password {{ freeipa_admin_password | quote }} - register: ipa_client_install - changed_when: ipa_client_install.rc == 0 +# TODO: FIXME: +# - name: Check for keytab from previous enrolement +# stat: +# path: "{{ _freeipa_keytab_backup_path }}krb5.keytab" +# register: _stat_backup_keytab + +# - name: Renrole with FreeIPA using backed-up keytab +# # reenrolment requires --force-join and --password, or --keytab +# # renrolement means: +# # 1. A new host certificate is issued +# # 2. The old host certificate is revoked +# # 3. New SSH keys are generated +# # 4. ipaUniqueID is preserved +# # and ALSO that the keytab is changed! +# command: +# cmd: > +# ipa-client-install +# --unattended +# --mkhomedir +# --enable-dns-updates +# --keytab {{ _freeipa_keytab_backup_path }}krb5.keytab +# when: _stat_backup_keytab.stat.exists +# register: ipa_client_install_keytab +# changed_when: ipa_client_install_keytab.rc == 0 +# failed_when: > +# ipa_client_install_keytab.rc !=0 and +# 'IPA client is already configured' not in ipa_client_install_keytab.stderr +# # TODO: find a way to NOT reenrole, if necessary?? + +- name: Enrole with FreeIPA using random password + # Note --password is overloaded - it's bulkpassword unless --principal or --force-join is used in which case it's admin password + command: + cmd: > + ipa-client-install + --unattended + --mkhomedir + --enable-dns-updates + --password '{{ freeipa_host_password }}' + register: ipa_client_install_password + changed_when: ipa_client_install_password.rc == 0 failed_when: > - ipa_client_install.rc != 0 and - 'IPA client is already configured' not in ipa_client_install.stderr + ipa_client_install_password.rc != 0 and + 'IPA client is already configured' not in ipa_client_install_password.stderr + +- name: Backup keytab + copy: + remote_src: true + src: /etc/krb5.keytab + dest: "{{ _freeipa_keytab_backup_path }}" + when: appliances_state_dir is defined diff --git a/ansible/roles/freeipa/tasks/users.yml b/ansible/roles/freeipa/tasks/users.yml index d35d2ab9f..b6f97d1f1 100644 --- a/ansible/roles/freeipa/tasks/users.yml +++ b/ansible/roles/freeipa/tasks/users.yml @@ -1,4 +1,3 @@ - name: Add users to freeipa - # this falls back to the keys from ansible.builtin.user, where they're equivalent community.general.ipa_user: "{{ freeipa_user_defaults | combine(item) }}" loop: "{{ freeipa_users }}" diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml index 612086da4..7af214c62 100644 --- a/ansible/roles/freeipa/tasks/validate.yml +++ b/ansible/roles/freeipa/tasks/validate.yml @@ -7,18 +7,6 @@ - etc_hosts run_once: yes -- name: Ensure role vars without defaults and TLD are defined - assert: - that: item is defined - fail_msg: "var {{ item }} must be defined when freeipa is enabled" - loop: - - freeipa_realm - - freeipa_ds_password - - freeipa_admin_password - - freeipa_server_ip # needed on server for installation, needed on client for resolv.conf - - tld - run_once: yes - - name: Get hostname as reported by command command: hostname register: _freeipa_validate_hostname diff --git a/environments/arcus/inventory/extra_groups b/environments/arcus/inventory/extra_groups index f9ac21530..a688d8c9d 100644 --- a/environments/arcus/inventory/extra_groups +++ b/environments/arcus/inventory/extra_groups @@ -1,3 +1,10 @@ [rebuild:children] control compute + +[freeipa_server:children] +control + +[freeipa_client:children] +login +compute diff --git a/environments/arcus/inventory/group_vars/all/freeipa.yml b/environments/arcus/inventory/group_vars/all/freeipa.yml index 47f59cfb8..0a271396f 100644 --- a/environments/arcus/inventory/group_vars/all/freeipa.yml +++ b/environments/arcus/inventory/group_vars/all/freeipa.yml @@ -1,3 +1,4 @@ +freeipa_setup_dns: true freeipa_users: - name: testuser # can't use rocky as $HOME isn't shared! password: "{{ vault_testuser_password }}" diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 1a3f6b421..5fabe40bb 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -3,8 +3,8 @@ ansible_user: rocky appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}" appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" -#appliances_state_dir: # define an absolute path here to use for persistent state -releasever: '8.6' +#appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform +appliances_nameservers: [] # if non-empty, supply up to 3 nameserver addresses to replace content of /etc/resolv.conf # Address(ip/dns) for internal communication between services. This is # normally traffic you do no want to expose to users. diff --git a/environments/common/inventory/group_vars/freeipa.yml b/environments/common/inventory/group_vars/all/freeipa_server.yml similarity index 67% rename from environments/common/inventory/group_vars/freeipa.yml rename to environments/common/inventory/group_vars/all/freeipa_server.yml index 31212b2a7..53ab3964e 100644 --- a/environments/common/inventory/group_vars/freeipa.yml +++ b/environments/common/inventory/group_vars/all/freeipa_server.yml @@ -1,3 +1,5 @@ +# See ansible/roles/freeipa/README.md +# These vars are only used when freeipa_server is enabled. They are not required when enabling only freeipa_client freeipa_realm: "{{ openhpc_cluster_name | upper }}.INVALID" freeipa_ds_password: "{{ vault_freeipa_ds_password }}" freeipa_admin_password: "{{ vault_freeipa_admin_password }}" diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index 0c15a694c..6693c0b77 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -106,7 +106,7 @@ control prometheus [freeipa_server] -# Hosts to be a FreeIPA server. See ansible/roles/freeipa/README.md +# Hosts to be a FreeIPA server. **NB**: Intended only for test/development use. See ansible/roles/freeipa/README.md [freeipa_client] # Hosts to be a FreeIPA client. See ansible/roles/freeipa/README.md @@ -114,4 +114,7 @@ prometheus [freeipa:children] # Allows defining variables common to freeipa_server and _client freeipa_server -freeipa_client \ No newline at end of file +freeipa_client + +[resolv_conf] +# Allows defining nameservers in /etc/resolv.conf - see ansible/bootstrap.yml diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 66ba46c58..6e2538cc4 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -54,11 +54,3 @@ compute [etc_hosts] # Hosts to manage /etc/hosts e.g. if no internal DNS. See ansible/roles/etc_hosts/README.md - -[freeipa_server:children] -# Hosts to be a FreeIPA server. See ansible/roles/freeipa/README.md -control - -[freeipa_client:children] -login -compute From f04bafa0ecdc9cb086ace7f1657a5df1d7944faf Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 10 Jan 2023 15:06:13 +0000 Subject: [PATCH 21/99] use ansible to save/restore client freeipa keytab from control's statedir --- ansible/roles/freeipa/defaults/main.yml | 2 +- ansible/roles/freeipa/tasks/client.yml | 90 ++++++++++++++++--------- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml index 69949a6c6..74c0e7ef0 100644 --- a/ansible/roles/freeipa/defaults/main.yml +++ b/ansible/roles/freeipa/defaults/main.yml @@ -11,4 +11,4 @@ freeipa_user_defaults: ipa_user: admin freeipa_users: [] # see community.general.ipa_user -# _freeipa_keytab_backup_path: "{{ appliances_state_dir | default ('') }}/freeipa/{{ inventory_hostname }}/" # must end in / to ensure directory creation TODO: FIXME +_freeipa_keytab_backup_path: "{{ hostvars[groups['control'].0].appliances_state_dir }}/freeipa/{{ inventory_hostname }}/krb5.keytab" diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index cdcde78a2..c4c85379e 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -18,34 +18,46 @@ mode: u=rw,og=r when: freeipa_setup_dns | bool -# TODO: FIXME: -# - name: Check for keytab from previous enrolement -# stat: -# path: "{{ _freeipa_keytab_backup_path }}krb5.keytab" -# register: _stat_backup_keytab +- name: Retrieve persisted keytab from previous enrolement + slurp: + src: "{{ _freeipa_keytab_backup_path }}" + delegate_to: control + register: _slurp_persisted_keytab + failed_when: false -# - name: Renrole with FreeIPA using backed-up keytab -# # reenrolment requires --force-join and --password, or --keytab -# # renrolement means: -# # 1. A new host certificate is issued -# # 2. The old host certificate is revoked -# # 3. New SSH keys are generated -# # 4. ipaUniqueID is preserved -# # and ALSO that the keytab is changed! -# command: -# cmd: > -# ipa-client-install -# --unattended -# --mkhomedir -# --enable-dns-updates -# --keytab {{ _freeipa_keytab_backup_path }}krb5.keytab -# when: _stat_backup_keytab.stat.exists -# register: ipa_client_install_keytab -# changed_when: ipa_client_install_keytab.rc == 0 -# failed_when: > -# ipa_client_install_keytab.rc !=0 and -# 'IPA client is already configured' not in ipa_client_install_keytab.stderr -# # TODO: find a way to NOT reenrole, if necessary?? +- debug: + var: _slurp_persisted_keytab + +- name: Write persisted keytab from previous enrolment + copy: + content: "{{ _slurp_persisted_keytab.content | b64decode }}" + dest: /tmp/krb5.keytab + owner: root + group: root + mode: ug=rw,o= + when: '"content" in _slurp_persisted_keytab' + +- name: Renrole with FreeIPA using backed-up keytab + # reenrolment requires --force-join and --password, or --keytab + # renrolement means: + # 1. A new host certificate is issued + # 2. The old host certificate is revoked + # 3. New SSH keys are generated + # 4. ipaUniqueID is preserved + # and ALSO that the keytab is changed! + command: + cmd: > + ipa-client-install + --unattended + --mkhomedir + --enable-dns-updates + --keytab /tmp/krb5.keytab + when: '"content" in _slurp_persisted_keytab' + register: ipa_client_install_keytab + changed_when: ipa_client_install_keytab.rc == 0 + failed_when: > + ipa_client_install_keytab.rc !=0 and + 'IPA client is already configured' not in ipa_client_install_keytab.stderr - name: Enrole with FreeIPA using random password # Note --password is overloaded - it's bulkpassword unless --principal or --force-join is used in which case it's admin password @@ -56,15 +68,31 @@ --mkhomedir --enable-dns-updates --password '{{ freeipa_host_password }}' + when: '"content" not in _slurp_persisted_keytab' register: ipa_client_install_password changed_when: ipa_client_install_password.rc == 0 failed_when: > ipa_client_install_password.rc != 0 and 'IPA client is already configured' not in ipa_client_install_password.stderr -- name: Backup keytab - copy: - remote_src: true +- name: Retrieve current keytab + slurp: src: /etc/krb5.keytab + register: _slurp_current_keytab + failed_when: false + +- name: Ensure keytab backup directory exists + file: + path: "{{ _freeipa_keytab_backup_path | dirname }}" + state: directory + owner: root + group: root + mode: ug=wrX,o= + delegate_to: control + +- name: Persist keytab + copy: + content: "{{ _slurp_current_keytab.content | b64decode }}" dest: "{{ _freeipa_keytab_backup_path }}" - when: appliances_state_dir is defined + delegate_to: control + # when: appliances_state_dir is defined From fa067ac1b83d2a6c97a1c53633121344edcd4573 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 10 Jan 2023 15:52:11 +0000 Subject: [PATCH 22/99] auto-set freeipa_setup_dns --- ansible/roles/freeipa/defaults/main.yml | 2 +- environments/arcus/inventory/group_vars/all/freeipa.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ansible/roles/freeipa/defaults/main.yml b/ansible/roles/freeipa/defaults/main.yml index 74c0e7ef0..03b844c8a 100644 --- a/ansible/roles/freeipa/defaults/main.yml +++ b/ansible/roles/freeipa/defaults/main.yml @@ -3,7 +3,7 @@ freeipa_domain: "{{ freeipa_realm | lower }}" #freeipa_ds_password: #freeipa_admin_password: #freeipa_server_ip: -freeipa_setup_dns: false +freeipa_setup_dns: "{{ groups['freeipa_server'] | length > 0 }}" freeipa_client_ip: "{{ ansible_host }}" # when run on freeipa_client group! # freeipa_host_password: freeipa_user_defaults: diff --git a/environments/arcus/inventory/group_vars/all/freeipa.yml b/environments/arcus/inventory/group_vars/all/freeipa.yml index 0a271396f..47f59cfb8 100644 --- a/environments/arcus/inventory/group_vars/all/freeipa.yml +++ b/environments/arcus/inventory/group_vars/all/freeipa.yml @@ -1,4 +1,3 @@ -freeipa_setup_dns: true freeipa_users: - name: testuser # can't use rocky as $HOME isn't shared! password: "{{ vault_testuser_password }}" From 63962910525a634184caeba0efe70e6b95eb4e3c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 10 Jan 2023 15:52:46 +0000 Subject: [PATCH 23/99] add freeipa README --- ansible/roles/freeipa/README.md | 53 +++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index bac686ec9..4cf69b759 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -1,10 +1,51 @@ +# freeipa +Support FreeIPA in the appliance. In production use it is expected the FreeIPA server(s) will be external to the cluster, implying that hosts and users are managed outside the appliance. However for testing and development the role can also deploy a FreeIPA server, add hosts to it and manage users on that host. -# Users +# FreeIPA Client -The dicts in `freeipa_users` take any parameters for the [community.general.ipa_user](https://docs.ansible.com/ansible/latest/collections/community/general/ipa_user_module.html#ansible-collections-community-general-ipa-user-module). Note that: - - Parameters `name`, `givenname` (firstname) and `sn` (surname) are required. - - Parameters `ipa_pass` and `ipa_user` are automatically set by the role. - - The uid and gid are automatically set by FreeIPA. - - If `password` is set, the value should *not* be a hash (unlike `ansible.builtin.user` as used by the `basic_users` role), and it must be changed on first login unless `krbpasswordexpiration` is set to some future date. +## Usage +- Add hosts to the `freeipa_client` group and run (at a minimum) the `iam.yml` playbook. +- Host names must match the domain name. By default (using the skeleton Terraform) hostnames are of the form `nodename.cluster_name.tld` where `cluster_name` and `tld` are Terraform variables. +- Hosts discover the FreeIPA server from DNS records. If using an external FreeIPA server and the default nameservers do not have these records, the external FreeIPA server could be used as the nameserver directly by setting `freeipa_setup_dns: true` and `freeipa_server_ip`. +- For production use (i.e. with an external FreeIPA server), a random one-time password (OTP) should be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrole the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. +- The `control` host must define `appliances_state_dir` on persistent storage. This is used to backup keytabs to allow FreeIPA clients to be renroled after e.g. reimaging. Note that: + - This is implemented when using the skeleton Terraform; on the control node `appliances_state_dir` defaults to `/var/lib/state` which is mounted from a volume. + - Nodes are not re-enroled by a Slurm-driven reimage (see the [rebuild role's readme](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md)) as that does not run this role. + + +## Role Variables for Clients + +- `freeipa_host_password`. Required for initial enrolment only, freeIPA host password as described above. +- `freeipa_setup_dns`: Optional, whether to use the FreeIPA server as the client's nameserver. Defaults to `true` when `freeipa_server` contains a host, otherwise `false`. +- `freeipa_server_ip`: IP address of FreeIPA server. Only required for client if `freeipa_setup_dns` is true. Default in common environment is `ansible_host` of `freeipa_server` host. + +See also use of `appliances_state_dir` on the control node above. + +# FreeIPA Server +As noted above this is only intended for development and testing. + +## Usage +- Add a single host to the `freeipa_server` group. +- As well as configuring the FreeIPA server, the role will also: + - Automatically configure the FreeIPA server as the first nameserver for `freeipa_client` hosts. + - Add ansible hosts in the group `freeipa_client` as FreeIPA hosts. + - Optionally control users in FreeIPA - see `freeipa_users` below. +- The `server.yml` playbook should be run on the `freeipa_server` host and the `addhost.yml` task file on the `freeipa_client` hosts (but only when a host is in `freeipa_server`). See `bootstrap.yml` for examples. + +## Role Variables for Server + +These role variables are only required when using `freeipa_server`: + +- `freeipa_realm`: Optional, name of realm. Default is `{{ openhpc_cluster_name | upper }}.INVALID` +- `freeipa_domain`: Optional, name of domain. Default is lowercased `freeipa_realm`. +- `freeipa_ds_password`: Optional, password to be used by the Directory Server for the Directory Manager user (`ipa-server-install --ds-password`). Default is generated in `environments//inventory/group_vars/all/secrets.yml` +- `freeipa_admin_password`: Optional, password for the IPA `admin` user. Default is generated as for `freeipa_ds_password`. +- `freeipa_server_ip`: Optional, IP address of freeipa_server host. Default is `ansible_host` of the `freeipa_server` host. +- `freeipa_setup_dns`: Optional bool, whether to configure the FreeIPA server as an integrated DNS server and define a zone and records. NB: This also controls whether `freeipa_client` hosts use the `freeipa_server` host for name resolution. Default `true` when `freeipa_server` contains a host. +- `freeipa_client_ip`: Optional, IP address of FreeIPA client. Default is `ansible_host`. +- `freeipa_users`: A list of dicts as per parameters for [community.general.ipa_user](https://docs.ansible.com/ansible/latest/collections/community/general/ipa_user_module.html). Note that: + - `name`, `givenname` (firstname) and `sn` (surname) are required. + - `ipa_pass` and `ipa_user` are automatically supplied. + - If `password` is set, the value should *not* be a hash (unlike `ansible.builtin.user` as used by the `basic_users` role), and it must be changed on first login. `krbpasswordexpiration` does not appear to be able to override this. From 2a59df0fec1c084d61fef797daad7be768274ce9 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 10 Jan 2023 16:14:09 +0000 Subject: [PATCH 24/99] Revert "tidy OOD auth defaults" This reverts commit 3722450fa3edf742825b3b31286e327e6755778f. --- ansible/roles/openondemand/defaults/main.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ansible/roles/openondemand/defaults/main.yml b/ansible/roles/openondemand/defaults/main.yml index e00cc06d3..23359f01c 100644 --- a/ansible/roles/openondemand/defaults/main.yml +++ b/ansible/roles/openondemand/defaults/main.yml @@ -4,17 +4,13 @@ openondemand_auth: # "oidc" or "basic_pam" openondemand_mapping_users: [] -# Variables for openondemand_auth='oidc' +## Variables for `openondemand_auth=oidc` : openondemand_oidc_client_id: openondemand_oidc_client_secret: openondemand_oidc_provider_url: openondemand_oidc_crypto_passphrase: openondemand_oidc_remote_user_claim: preferred_username openondemand_oidc_scope: openid profile preferred_username -openondemand_mapping_users: [] - -# Variables for openondemand_auth='basic_pam' -# None. # SSL Certificates openondemand_ssl_cert: /etc/pki/tls/certs/localhost.crt From 97955af6ae7bb218f76cac4fe2cc25aa8d66eeca Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 17 Jan 2023 14:47:12 +0000 Subject: [PATCH 25/99] tweak freeipa README --- ansible/roles/freeipa/README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index 4cf69b759..7016d7959 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -6,22 +6,23 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s # FreeIPA Client ## Usage -- Add hosts to the `freeipa_client` group and run (at a minimum) the `iam.yml` playbook. +- Add hosts to the `freeipa_client` group and run (at a minimum) the `ansible/iam.yml` playbook. - Host names must match the domain name. By default (using the skeleton Terraform) hostnames are of the form `nodename.cluster_name.tld` where `cluster_name` and `tld` are Terraform variables. - Hosts discover the FreeIPA server from DNS records. If using an external FreeIPA server and the default nameservers do not have these records, the external FreeIPA server could be used as the nameserver directly by setting `freeipa_setup_dns: true` and `freeipa_server_ip`. - For production use (i.e. with an external FreeIPA server), a random one-time password (OTP) should be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrole the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. -- The `control` host must define `appliances_state_dir` on persistent storage. This is used to backup keytabs to allow FreeIPA clients to be renroled after e.g. reimaging. Note that: +- The `control` host must define `appliances_state_dir` (on persistent storage). This is used to backup keytabs to allow FreeIPA clients to be renroled after e.g. reimaging. Note that: - This is implemented when using the skeleton Terraform; on the control node `appliances_state_dir` defaults to `/var/lib/state` which is mounted from a volume. - Nodes are not re-enroled by a Slurm-driven reimage (see the [rebuild role's readme](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md)) as that does not run this role. + - If both a backed-up keytab and `freeipa_host_password` exist, the former is used. ## Role Variables for Clients -- `freeipa_host_password`. Required for initial enrolment only, freeIPA host password as described above. +- `freeipa_host_password`. Required for initial enrolment only, FreeIPA host password as described above. - `freeipa_setup_dns`: Optional, whether to use the FreeIPA server as the client's nameserver. Defaults to `true` when `freeipa_server` contains a host, otherwise `false`. - `freeipa_server_ip`: IP address of FreeIPA server. Only required for client if `freeipa_setup_dns` is true. Default in common environment is `ansible_host` of `freeipa_server` host. -See also use of `appliances_state_dir` on the control node above. +See also use of `appliances_state_dir` on the control node as described above. # FreeIPA Server As noted above this is only intended for development and testing. From ec41e8b52767259f98a2998980e00c9f445d9df8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 19 Jan 2023 16:57:38 +0000 Subject: [PATCH 26/99] improve validation output when hostname is not fqdn --- ansible/roles/freeipa/tasks/validate.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml index 7af214c62..2daf30c9a 100644 --- a/ansible/roles/freeipa/tasks/validate.yml +++ b/ansible/roles/freeipa/tasks/validate.yml @@ -16,3 +16,4 @@ # see section 2.7 of redhat guide to installing identity management assert: that: _freeipa_validate_hostname.stdout | split('.') | length >= 3 + fail_msg: "Hostname {{ _freeipa_validate_hostname.stdout }} is not fully-qualified (a.b.c)" From c5bf1dd62916921373cefcd68a80353aa3213c96 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 25 Jan 2023 10:13:30 +0000 Subject: [PATCH 27/99] define hostname as fqdn reliably (using userdata) and set fqdn hostvar --- .../inventory/group_vars/all/defaults.yml | 1 + .../terraform/nodes.tf | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 5fabe40bb..012eed560 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -1,6 +1,7 @@ --- # Miscellaneous ansible_user: rocky +fqdn: "{{ [inventory_hostname, openhpc_cluster_name, tld] | join('.') }}" # works even without facts unlike ansible_fqdn appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}" appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" #appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf index 2de921d69..6f5d6b2d7 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf @@ -111,6 +111,10 @@ resource "openstack_compute_instance_v2" "control" { user_data = <<-EOF #cloud-config + hostname: control + fqdn: control.${var.cluster_name}.${var.tld} + prefer_fqdn_over_hostname: true + fs_setup: - label: state filesystem: ext4 @@ -152,6 +156,13 @@ resource "openstack_compute_instance_v2" "login" { environment_root = var.environment_root } + user_data = <<-EOF + #cloud-config + hostname: ${each.key} + fqdn: ${each.key}.${var.cluster_name}.${var.tld} + prefer_fqdn_over_hostname: true + EOF + lifecycle{ ignore_changes = [ image_name, @@ -178,6 +189,13 @@ resource "openstack_compute_instance_v2" "compute" { environment_root = var.environment_root } + user_data = <<-EOF + #cloud-config + hostname: ${each.key} + fqdn: ${each.key}.${var.cluster_name}.${var.tld} + prefer_fqdn_over_hostname: true + EOF + lifecycle{ ignore_changes = [ image_name, From a6cd6f592aa0375fb9bf871c4b21621dd3989527 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 25 Jan 2023 10:05:30 +0000 Subject: [PATCH 28/99] add resolv_conf and proxy roles --- ansible/bootstrap.yml | 29 ++++----- ansible/roles/proxy/README.md | 11 ++++ ansible/roles/proxy/defaults/main.yml | 5 ++ ansible/roles/proxy/tasks/main.yml | 59 +++++++++++++++++++ ansible/roles/resolv_conf/README.md | 8 +++ ansible/roles/resolv_conf/defaults/main.yml | 1 + .../files/NetworkManager-dns-none.conf.j2 | 2 + ansible/roles/resolv_conf/tasks/main.yml | 30 ++++++++++ .../resolv_conf/templates/resolv.conf.j2 | 6 ++ .../inventory/group_vars/all/defaults.yml | 1 - environments/common/inventory/groups | 9 ++- environments/common/layouts/everything | 6 ++ 12 files changed, 145 insertions(+), 22 deletions(-) create mode 100644 ansible/roles/proxy/README.md create mode 100644 ansible/roles/proxy/defaults/main.yml create mode 100644 ansible/roles/proxy/tasks/main.yml create mode 100644 ansible/roles/resolv_conf/README.md create mode 100644 ansible/roles/resolv_conf/defaults/main.yml create mode 100644 ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 create mode 100644 ansible/roles/resolv_conf/tasks/main.yml create mode 100644 ansible/roles/resolv_conf/templates/resolv.conf.j2 diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index c4cc853ce..c883358fa 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -13,25 +13,14 @@ to update these variable names. ** NB: The actual secrets will not be changed.** when: "'secrets_openhpc_' in (hostvars[inventory_hostname] | join)" -- hosts: all +- name: Write /etc/resolv/conf + hosts: resolv_conf + become: yes gather_facts: false tags: resolv_conf - become: yes tasks: - - name: Set nameservers in /etc/resolv.conf - copy: - content: | - # Created by slurm appliance ansible/bootstrap.yml - search {{ openhpc_cluster_name }}.{{ tld }} - - {% for ns in appliances_nameservers[0:3] %} - nameserver {{ ns }} - {% endfor %} - dest: /etc/resolv.conf - owner: root - group: root - mode: u=rw,og=r - when: appliances_nameservers | length > 0 + - import_role: + name: resolv_conf - hosts: etc_hosts gather_facts: false @@ -41,6 +30,14 @@ - import_role: name: etc_hosts +- hosts: proxy + gather_facts: false + tags: proxy + become: yes + tasks: + - import_role: + name: proxy + - hosts: cluster gather_facts: false tasks: diff --git a/ansible/roles/proxy/README.md b/ansible/roles/proxy/README.md new file mode 100644 index 000000000..9af2c5e40 --- /dev/null +++ b/ansible/roles/proxy/README.md @@ -0,0 +1,11 @@ +# proxy + +Define http/s proxy configuration. + +## Role variables + +- `proxy_http_proxy`: Required. Address of http proxy. E.g. "http://10.1.0.28:3128" for a Squid proxy on default port. +- `proxy_https_proxy`: Optional. Address of https proxy. Default is `{{ proxy_http_proxy }}`. +- `proxy_no_proxy`: Optional. Comma-separated list of addresses not to proxy. Default is to concatenate `inventory_hostname` and `fqdn` for all Ansible hosts. +- `proxy_dnf`: Optional bool. Whether to configure yum/dnf proxying through `proxy_http_proxy`. Default `true`. +- `proxy_systemd`: Optional bool. Whether to give processes started by systemd the above http, https and no_proxy configuration. **NB** Running services will need restarting if this is changed. Default `true`. diff --git a/ansible/roles/proxy/defaults/main.yml b/ansible/roles/proxy/defaults/main.yml new file mode 100644 index 000000000..89be5e2eb --- /dev/null +++ b/ansible/roles/proxy/defaults/main.yml @@ -0,0 +1,5 @@ +# proxy_http_proxy: +proxy_https_proxy: "{{ proxy_http_proxy }}" +proxy_no_proxy: "{{ (groups['all'] + hostvars.values() | map(attribute='fqdn')) | sort | join(',') }}" +proxy_dnf: true +proxy_systemd: true diff --git a/ansible/roles/proxy/tasks/main.yml b/ansible/roles/proxy/tasks/main.yml new file mode 100644 index 000000000..3a6c74ee5 --- /dev/null +++ b/ansible/roles/proxy/tasks/main.yml @@ -0,0 +1,59 @@ +- name: Define configuration in /etc/environment + tags: proxy + lineinfile: + path: "/etc/environment" + create: yes + owner: root + group: root + mode: o=rw,go=r + state: present + regexp: "{{ item.key }}=.*" + line: "{{ item.key }}={{ item.value }}" + loop: + - key: http_proxy + value: "{{ proxy_http_proxy }}" + - key: https_proxy + value: "{{ proxy_https_proxy }}" + - key: no_proxy + value: "{{ proxy_no_proxy }}" + +- name: Define dnf/yum proxy + ini_file: + path: /etc/yum.conf + section: main + option: "proxy" + value: "{{ http_proxy }}" + no_extra_spaces: true + owner: root + group: root + mode: o=rw,go=r + when: proxy_dnf | bool + +- name: Create systemd configuration directory + file: + path: /etc/systemd/system.conf.d/ + state: directory + owner: root + group: root + mode: ug=rw,o=rX + when: proxy_systemd | bool + +- name: Define proxy configuration for systemd units + community.general.ini_file: + path: /etc/systemd/system.conf.d/90-proxy.conf + section: Manager + option: DefaultEnvironment + value: > + "http_proxy={{ proxy_http_proxy }}" "https_proxy={{ proxy_http_proxy }}" "no_proxy={{ proxy_no_proxy }}" + no_extra_spaces: true + owner: root + group: root + mode: ug=rw,o=r + register: _copy_systemd_proxy + when: proxy_systemd | bool + +- name: Restart systemd + command: systemctl daemon-reexec + when: + - proxy_systemd | bool + - _copy_systemd_proxy.changed | default(false) diff --git a/ansible/roles/resolv_conf/README.md b/ansible/roles/resolv_conf/README.md new file mode 100644 index 000000000..4563b6931 --- /dev/null +++ b/ansible/roles/resolv_conf/README.md @@ -0,0 +1,8 @@ +# resolv_conf + +Template out `/etc/resolv.conf`. If used, NetworkManager will be prevented from rewriting this file on boot. + +## Role variables +- `resolv_conf_nameservers`: List of up to 3 nameserver addresses. + +Note if `/etc/resolv.conf` includes `127.0.0.1` (e.g. due to a FreeIPA server installation), then `resolv_conf_nameservers` is ignored and this role does not change `/etc/resolv.conf` diff --git a/ansible/roles/resolv_conf/defaults/main.yml b/ansible/roles/resolv_conf/defaults/main.yml new file mode 100644 index 000000000..37c97b786 --- /dev/null +++ b/ansible/roles/resolv_conf/defaults/main.yml @@ -0,0 +1 @@ +resolv_conf_nameservers: [] diff --git a/ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 b/ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 new file mode 100644 index 000000000..d435aba99 --- /dev/null +++ b/ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 @@ -0,0 +1,2 @@ +[main] +dns=none diff --git a/ansible/roles/resolv_conf/tasks/main.yml b/ansible/roles/resolv_conf/tasks/main.yml new file mode 100644 index 000000000..6abbacc81 --- /dev/null +++ b/ansible/roles/resolv_conf/tasks/main.yml @@ -0,0 +1,30 @@ +- name: Read nameservers from /etc/resolv.conf + ansible.builtin.slurp: + src: /etc/resolv.conf + register: _slurp_resolv_conf + +- name: Set nameservers in /etc/resolv.conf + # Might need to set this for freeipa_server host, but freeipa server install + # will then change it to point to 127.0.0.1. + ansible.builtin.template: + src: resolv.conf.j2 + dest: /etc/resolv.conf + owner: root + group: root + mode: u=rw,og=r + when: "'127.0.0.1' not in (_slurp_resolv_conf.content | b64decode)" + +- name: Disable NetworkManager control of resolv.conf + ansible.builtin.copy: + src: NetworkManager-dns-none.conf.j2 + dest: /etc/NetworkManager/conf.d/90-dns-none.conf + owner: root + group: root + mode: u=rw,og=r + register: _copy_nm_config + +- name: Reload NetworkManager + ansible.builtin.systemd: + name: NetworkManager + state: reloaded + when: _copy_nm_config.changed | default(false) diff --git a/ansible/roles/resolv_conf/templates/resolv.conf.j2 b/ansible/roles/resolv_conf/templates/resolv.conf.j2 new file mode 100644 index 000000000..59c2c000f --- /dev/null +++ b/ansible/roles/resolv_conf/templates/resolv.conf.j2 @@ -0,0 +1,6 @@ +# Created by slurm appliance ansible/roles/resolv_conf +search {{ openhpc_cluster_name }}.{{ tld }} + +{% for ns in resolv_conf_nameservers[0:3] %} +nameserver {{ ns }} +{% endfor %} diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 012eed560..046b2f985 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -5,7 +5,6 @@ fqdn: "{{ [inventory_hostname, openhpc_cluster_name, tld] | join('.') }}" # wor appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}" appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" #appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform -appliances_nameservers: [] # if non-empty, supply up to 3 nameserver addresses to replace content of /etc/resolv.conf # Address(ip/dns) for internal communication between services. This is # normally traffic you do no want to expose to users. diff --git a/environments/common/inventory/groups b/environments/common/inventory/groups index 6693c0b77..22684c49f 100644 --- a/environments/common/inventory/groups +++ b/environments/common/inventory/groups @@ -94,10 +94,6 @@ fail2ban [etc_hosts] # Hosts to manage /etc/hosts e.g. if no internal DNS. See ansible/roles/etc_hosts/README.md -[cloud_init:children] -# Hosts to template out cloud_init data for -etc_hosts - [systemd:children] # Hosts to make systemd unit adjustments on opendistro @@ -117,4 +113,7 @@ freeipa_server freeipa_client [resolv_conf] -# Allows defining nameservers in /etc/resolv.conf - see ansible/bootstrap.yml +# Allows defining nameservers in /etc/resolv.conf - see ansible/roles/resolv_conf/README.md + +[proxy] +# Hosts to configure http/s proxies - see ansible/roles/proxy/README.md diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 6e2538cc4..68a3270ce 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -54,3 +54,9 @@ compute [etc_hosts] # Hosts to manage /etc/hosts e.g. if no internal DNS. See ansible/roles/etc_hosts/README.md + +[resolv_conf] +# Allows defining nameservers in /etc/resolv.conf - see ansible/roles/resolv_conf/README.md + +[proxy] +# Hosts to configure http/s proxies - see ansible/roles/proxy/README.md From 05dc9546ad38b0887c69078cdd200cb51cc3a527 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 25 Jan 2023 10:56:29 +0000 Subject: [PATCH 29/99] add missing gitignore changes for proxy/resolv_conf --- ansible/.gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ansible/.gitignore b/ansible/.gitignore index 8301a5cea..59da9be58 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -36,3 +36,7 @@ roles/* !roles/systemd/** !roles/freeipa/ !roles/freeipa/** +!roles/proxy/ +!roles/proxy/** +!roles/resolv_conf/ +!roles/resolv_conf/** From bd5868aff45b68dd27062d35815a5c4eac2a9126 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 25 Jan 2023 12:16:57 +0000 Subject: [PATCH 30/99] fix dnf proxy --- ansible/roles/proxy/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/proxy/tasks/main.yml b/ansible/roles/proxy/tasks/main.yml index 3a6c74ee5..26f05d58d 100644 --- a/ansible/roles/proxy/tasks/main.yml +++ b/ansible/roles/proxy/tasks/main.yml @@ -22,7 +22,7 @@ path: /etc/yum.conf section: main option: "proxy" - value: "{{ http_proxy }}" + value: "{{ proxy_http_proxy }}" no_extra_spaces: true owner: root group: root From 0f987261c98ef687504189150bb8e16e4e7db950 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 24 Jan 2023 22:29:04 +0000 Subject: [PATCH 31/99] move freeipa host and user tasks to iam.yml --- ansible/bootstrap.yml | 14 +------------- ansible/iam.yml | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index c883358fa..f125d9ecb 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -84,7 +84,7 @@ register: sestatus - hosts: freeipa_server - # done here as it might be providing DNS + # Done here as it might be providing DNS tags: - freeipa - freeipa_server @@ -96,18 +96,6 @@ name: freeipa tasks_from: server.yml -- hosts: freeipa_client - tags: - - freeipa - - freeipa_server # as this is only relevant if using freeipa_server - gather_facts: no - become: yes - tasks: - - import_role: - name: freeipa - tasks_from: addhost.yml - when: groups['freeipa_server'] | length > 0 - # --- tasks after here require access to package repos --- - hosts: firewalld diff --git a/ansible/iam.yml b/ansible/iam.yml index 6e85f62ba..ee47e33fc 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -1,13 +1,16 @@ -- hosts: freeipa_server +- hosts: freeipa_client tags: - freeipa - - freeipa_server - gather_facts: yes + - freeipa_server # as this is only relevant if using freeipa_server + - freeipa_host + gather_facts: no become: yes tasks: - - import_role: + - name: Add freeipa hosts + import_role: name: freeipa - tasks_from: users.yml + tasks_from: addhost.yml + when: groups['freeipa_server'] | length > 0 - hosts: freeipa_client tags: @@ -16,14 +19,29 @@ gather_facts: yes become: yes tasks: - - import_role: + - name: Enrole freeipa clients + import_role: name: freeipa tasks_from: client.yml +- hosts: freeipa_server + tags: + - freeipa + - freeipa_server + - users + gather_facts: yes + become: yes + tasks: + - name: Add freeipa users + import_role: + name: freeipa + tasks_from: users.yml + - hosts: basic_users become: yes tags: - basic_users + - users gather_facts: yes tasks: - import_role: From 4614cb34fcda7cae905b63146c16aeb3859cb86e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 25 Jan 2023 10:55:43 +0000 Subject: [PATCH 32/99] rewrite freeipa role --- ansible/roles/freeipa/README.md | 19 ++++++++--------- ansible/roles/freeipa/tasks/addhost.yml | 27 +++++++++++++++++------- ansible/roles/freeipa/tasks/client.yml | 17 --------------- ansible/roles/freeipa/tasks/users.yml | 1 + ansible/roles/freeipa/tasks/validate.yml | 19 ++++++++--------- 5 files changed, 38 insertions(+), 45 deletions(-) diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index 7016d7959..3c665939b 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -1,18 +1,19 @@ # freeipa -Support FreeIPA in the appliance. In production use it is expected the FreeIPA server(s) will be external to the cluster, implying that hosts and users are managed outside the appliance. However for testing and development the role can also deploy a FreeIPA server, add hosts to it and manage users on that host. +Support FreeIPA in the appliance. In production use it is expected the FreeIPA server(s) will be external to the cluster, implying that hosts and users are managed outside the appliance. However for testing and development the role can also deploy a FreeIPA server, add hosts to it and manage users in FreeIPA. # FreeIPA Client ## Usage - Add hosts to the `freeipa_client` group and run (at a minimum) the `ansible/iam.yml` playbook. - Host names must match the domain name. By default (using the skeleton Terraform) hostnames are of the form `nodename.cluster_name.tld` where `cluster_name` and `tld` are Terraform variables. -- Hosts discover the FreeIPA server from DNS records. If using an external FreeIPA server and the default nameservers do not have these records, the external FreeIPA server could be used as the nameserver directly by setting `freeipa_setup_dns: true` and `freeipa_server_ip`. -- For production use (i.e. with an external FreeIPA server), a random one-time password (OTP) should be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrole the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. -- The `control` host must define `appliances_state_dir` (on persistent storage). This is used to backup keytabs to allow FreeIPA clients to be renroled after e.g. reimaging. Note that: +- Hosts discover the FreeIPA server FQDN (and their own domain) from DNS records. If this is not automatically the case (e.g. when using the in-appliance FreeIPA development server) the `resolv_conf` role run by `ansible/bootstrap.yml` can be used to add the FreeIPA server as the nameserver. +- For production use with an external FreeIPA server, a random one-time password (OTP) must be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrole the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. +- For development use with the in-appliance FreeIPA server, `freeipa_host_password` will be automatically generated in memory. +- The `control` host must define `appliances_state_dir` (on persistent storage). This is used to back-up keytabs to allow FreeIPA clients to automatically reenroll after e.g. reimaging. Note that: - This is implemented when using the skeleton Terraform; on the control node `appliances_state_dir` defaults to `/var/lib/state` which is mounted from a volume. - - Nodes are not re-enroled by a Slurm-driven reimage (see the [rebuild role's readme](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md)) as that does not run this role. + - Nodes are not re-enroled by a [Slurm-driven reimage](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md)) as that does not run this role. - If both a backed-up keytab and `freeipa_host_password` exist, the former is used. @@ -20,7 +21,6 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s - `freeipa_host_password`. Required for initial enrolment only, FreeIPA host password as described above. - `freeipa_setup_dns`: Optional, whether to use the FreeIPA server as the client's nameserver. Defaults to `true` when `freeipa_server` contains a host, otherwise `false`. -- `freeipa_server_ip`: IP address of FreeIPA server. Only required for client if `freeipa_setup_dns` is true. Default in common environment is `ansible_host` of `freeipa_server` host. See also use of `appliances_state_dir` on the control node as described above. @@ -28,12 +28,10 @@ See also use of `appliances_state_dir` on the control node as described above. As noted above this is only intended for development and testing. ## Usage -- Add a single host to the `freeipa_server` group. +- Add a single host to the `freeipa_server` group and run (at a minimum) the `ansible/bootstrap.yml` and `ansible/iam.yml` playbooks. - As well as configuring the FreeIPA server, the role will also: - - Automatically configure the FreeIPA server as the first nameserver for `freeipa_client` hosts. - Add ansible hosts in the group `freeipa_client` as FreeIPA hosts. - Optionally control users in FreeIPA - see `freeipa_users` below. -- The `server.yml` playbook should be run on the `freeipa_server` host and the `addhost.yml` task file on the `freeipa_client` hosts (but only when a host is in `freeipa_server`). See `bootstrap.yml` for examples. ## Role Variables for Server @@ -43,7 +41,8 @@ These role variables are only required when using `freeipa_server`: - `freeipa_domain`: Optional, name of domain. Default is lowercased `freeipa_realm`. - `freeipa_ds_password`: Optional, password to be used by the Directory Server for the Directory Manager user (`ipa-server-install --ds-password`). Default is generated in `environments//inventory/group_vars/all/secrets.yml` - `freeipa_admin_password`: Optional, password for the IPA `admin` user. Default is generated as for `freeipa_ds_password`. -- `freeipa_server_ip`: Optional, IP address of freeipa_server host. Default is `ansible_host` of the `freeipa_server` host. +- `freeipa_generate_host_password`: Optional bool, whether to generate a password for each `freeipa_client` host. Should only be used on the first run, as generating a new password will unenrole enroled hosts. +- `freeipa_server_ip`: Optional, IP address of freeipa_server host. Default is `ansible_host` of the `freeipa_server` host. Default `false`. - `freeipa_setup_dns`: Optional bool, whether to configure the FreeIPA server as an integrated DNS server and define a zone and records. NB: This also controls whether `freeipa_client` hosts use the `freeipa_server` host for name resolution. Default `true` when `freeipa_server` contains a host. - `freeipa_client_ip`: Optional, IP address of FreeIPA client. Default is `ansible_host`. - `freeipa_users`: A list of dicts as per parameters for [community.general.ipa_user](https://docs.ansible.com/ansible/latest/collections/community/general/ipa_user_module.html). Note that: diff --git a/ansible/roles/freeipa/tasks/addhost.yml b/ansible/roles/freeipa/tasks/addhost.yml index 92544e318..e1cec0080 100644 --- a/ansible/roles/freeipa/tasks/addhost.yml +++ b/ansible/roles/freeipa/tasks/addhost.yml @@ -1,14 +1,23 @@ -- name: Ensure freeipa_server_ip is first in freeipa_client host's resolv.conf - lineinfile: - path: /etc/resolv.conf - line: nameserver {{ freeipa_server_ip }} - firstmatch: true - insertbefore: '^nameserver' +- name: Get ipa host information + # This uses DNS to find the ipa server, which works as this is running on the enrolled ipa server + # It doesn't fail even if the host doesn't exist + community.general.ipa_host: + name: "{{ fqdn }}" + ip_address: "{{ freeipa_client_ip }}" + ipa_host: "{{ groups['freeipa_server'].0 }}" + ipa_pass: "{{ vault_freeipa_admin_password }}" + ipa_user: admin + state: present + validate_certs: false + delegate_to: "{{ groups['freeipa_server'].0 }}" + register: _ipa_host_check + check_mode: yes + changed_when: false - name: Add host to IPA - # annoyingly this always shows as changed! + # Using random_password=true this unenroles an enroled host, hence the check above community.general.ipa_host: - name: "{{ [inventory_hostname, openhpc_cluster_name, tld] | join('.') }}" # don't want to use ansible_ variables as might not have host in play e.g. during image build + name: "{{ fqdn }}" ip_address: "{{ freeipa_client_ip }}" ipa_host: "{{ groups['freeipa_server'].0 }}" ipa_pass: "{{ vault_freeipa_admin_password }}" @@ -17,8 +26,10 @@ state: present validate_certs: false delegate_to: "{{ groups['freeipa_server'].0 }}" + when: "'sshpubkeyfp' not in _ipa_host_check.host" register: _ipa_host_add - name: Set fact for ipa host password set_fact: freeipa_host_password: "{{ _ipa_host_add.host.randompassword }}" + when: _ipa_host_add.changed diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index c4c85379e..168401ef3 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -4,20 +4,6 @@ dnf: name: ipa-client -- name: Set IPA server as nameserver - copy: - dest: /etc/resolv.conf - content: | - ; Created by slurm appliance/ansible/roles/freeipa/tasks/client.yml - ; - - nameserver {{ freeipa_server_ip }} - nameserver 131.111.12.20 - owner: root - group: root - mode: u=rw,og=r - when: freeipa_setup_dns | bool - - name: Retrieve persisted keytab from previous enrolement slurp: src: "{{ _freeipa_keytab_backup_path }}" @@ -25,9 +11,6 @@ register: _slurp_persisted_keytab failed_when: false -- debug: - var: _slurp_persisted_keytab - - name: Write persisted keytab from previous enrolment copy: content: "{{ _slurp_persisted_keytab.content | b64decode }}" diff --git a/ansible/roles/freeipa/tasks/users.yml b/ansible/roles/freeipa/tasks/users.yml index b6f97d1f1..a6be8392c 100644 --- a/ansible/roles/freeipa/tasks/users.yml +++ b/ansible/roles/freeipa/tasks/users.yml @@ -1,3 +1,4 @@ - name: Add users to freeipa + # This uses DNS to find the ipa server, which works as this is running on the enrolled ipa server community.general.ipa_user: "{{ freeipa_user_defaults | combine(item) }}" loop: "{{ freeipa_users }}" diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml index 2daf30c9a..02f13d18b 100644 --- a/ansible/roles/freeipa/tasks/validate.yml +++ b/ansible/roles/freeipa/tasks/validate.yml @@ -1,19 +1,18 @@ -- name: Ensure incompatible functionality is not enabled - assert: - that: groups[item] | length == 0 - fail_msg: "The group {{ item }} contains hosts - this cannot be used with freeipa enabled" - loop: - - basic_users - - etc_hosts - run_once: yes - - name: Get hostname as reported by command command: hostname register: _freeipa_validate_hostname changed_when: false + when: "'freeipa_server' in group_names" - name: Ensure hostname is fully-qualified # see section 2.7 of redhat guide to installing identity management assert: that: _freeipa_validate_hostname.stdout | split('.') | length >= 3 - fail_msg: "Hostname {{ _freeipa_validate_hostname.stdout }} is not fully-qualified (a.b.c)" + fail_msg: "FreeIPA server hostname '{{ _freeipa_validate_hostname.stdout }}' is not fully-qualified (a.b.c)" + when: "'freeipa_server' in group_names" + +- name: Ensure control node has persistent storage defined + assert: + that: "{{ 'appliances_state_dir' in hostvars['control'] }}" + fail_msg: "Variable appliances_state_dir must be defined on the control node (pointing to persistent storage) when using the freeipa role." + run_once: true From 585823ad0404ebc8f610b20cfba62d13b633c9e1 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 31 Jan 2023 15:54:38 +0000 Subject: [PATCH 33/99] use new resolv_conf role on arcus to set nameservers --- environments/arcus/inventory/extra_groups | 3 +++ .../arcus/inventory/group_vars/resolv_conf/overrides.yml | 2 ++ 2 files changed, 5 insertions(+) create mode 100644 environments/arcus/inventory/group_vars/resolv_conf/overrides.yml diff --git a/environments/arcus/inventory/extra_groups b/environments/arcus/inventory/extra_groups index a688d8c9d..3f024a1c5 100644 --- a/environments/arcus/inventory/extra_groups +++ b/environments/arcus/inventory/extra_groups @@ -8,3 +8,6 @@ control [freeipa_client:children] login compute + +[resolv_conf:children] +freeipa_client diff --git a/environments/arcus/inventory/group_vars/resolv_conf/overrides.yml b/environments/arcus/inventory/group_vars/resolv_conf/overrides.yml new file mode 100644 index 000000000..668cd9d07 --- /dev/null +++ b/environments/arcus/inventory/group_vars/resolv_conf/overrides.yml @@ -0,0 +1,2 @@ +resolv_conf_nameservers: + - "{{ hostvars[groups['freeipa_server'].0].ansible_host }}" \ No newline at end of file From b9766e3358f17c9f4b760c5121a6e5b1a5a05085 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 1 Feb 2023 09:10:01 +0000 Subject: [PATCH 34/99] fix freeipa client fail when enroled but no keytab persisted --- ansible/roles/freeipa/tasks/client.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index 168401ef3..e49b43b36 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -51,7 +51,9 @@ --mkhomedir --enable-dns-updates --password '{{ freeipa_host_password }}' - when: '"content" not in _slurp_persisted_keytab' + when: + - '"content" not in _slurp_persisted_keytab' + - freeipa_host_password is defined register: ipa_client_install_password changed_when: ipa_client_install_password.rc == 0 failed_when: > From 05f5c960f1e3612425c57c519bcbf34e4c054758 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 1 Feb 2023 16:29:49 +0000 Subject: [PATCH 35/99] change fqdn->node_fqnd and tld->cluster_domain_suffix --- ansible/adhoc/rebuild.yml | 2 +- ansible/roles/etc_hosts/templates/hosts.j2 | 2 +- ansible/roles/freeipa/README.md | 4 +++- ansible/roles/freeipa/tasks/addhost.yml | 4 ++-- ansible/roles/proxy/README.md | 2 +- ansible/roles/proxy/defaults/main.yml | 2 +- .../roles/resolv_conf/templates/resolv.conf.j2 | 2 +- .../inventory/group_vars/all/defaults.yml | 2 +- .../terraform/inventory.tf | 2 +- .../terraform/inventory.tpl | 2 +- .../terraform/nodes.tf | 18 +++++++++--------- .../terraform/variables.tf | 6 +++--- 12 files changed, 25 insertions(+), 23 deletions(-) diff --git a/ansible/adhoc/rebuild.yml b/ansible/adhoc/rebuild.yml index e7e37633a..e9b170d9e 100644 --- a/ansible/adhoc/rebuild.yml +++ b/ansible/adhoc/rebuild.yml @@ -16,5 +16,5 @@ - command: "openstack server rebuild {{ instance_id | default(instance_name) }}{% if rebuild_image is defined %} --image {{ rebuild_image }}{% endif %}" delegate_to: localhost vars: - instance_name: "{{ [inventory_hostname, openhpc_cluster_name, tld] | join('.') if tld is defined else inventory_hostname }}" + instance_name: "{{ node_fqdn if cluster_domain_suffix is defined else inventory_hostname }}" - wait_for_connection: diff --git a/ansible/roles/etc_hosts/templates/hosts.j2 b/ansible/roles/etc_hosts/templates/hosts.j2 index 71bb47c0d..737abe5fe 100644 --- a/ansible/roles/etc_hosts/templates/hosts.j2 +++ b/ansible/roles/etc_hosts/templates/hosts.j2 @@ -2,5 +2,5 @@ ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 {% for inventory_hostname in groups['etc_hosts'] | sort -%} -{{ hostvars[inventory_hostname]['ansible_host'] }} {% if tld is defined %}{{ inventory_hostname }}.{{ openhpc_cluster_name }}.{{ tld }}{% endif %} {{ inventory_hostname }} +{{ hostvars[inventory_hostname]['ansible_host'] }} {% if cluster_domain_suffix is defined %}{{ node_fqdn }}{% endif %} {{ inventory_hostname }} {% endfor -%} diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index 3c665939b..502013946 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -7,7 +7,7 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s ## Usage - Add hosts to the `freeipa_client` group and run (at a minimum) the `ansible/iam.yml` playbook. -- Host names must match the domain name. By default (using the skeleton Terraform) hostnames are of the form `nodename.cluster_name.tld` where `cluster_name` and `tld` are Terraform variables. +- Host names must match the domain name. By default (using the skeleton Terraform) hostnames are of the form `nodename.cluster_name.cluster_domain_suffix` where `cluster_name` and `cluster_domain_suffix` are Terraform variables. - Hosts discover the FreeIPA server FQDN (and their own domain) from DNS records. If this is not automatically the case (e.g. when using the in-appliance FreeIPA development server) the `resolv_conf` role run by `ansible/bootstrap.yml` can be used to add the FreeIPA server as the nameserver. - For production use with an external FreeIPA server, a random one-time password (OTP) must be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrole the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. - For development use with the in-appliance FreeIPA server, `freeipa_host_password` will be automatically generated in memory. @@ -33,6 +33,8 @@ As noted above this is only intended for development and testing. - Add ansible hosts in the group `freeipa_client` as FreeIPA hosts. - Optionally control users in FreeIPA - see `freeipa_users` below. +The FreeIPA GUI will be available on `https:///ipa/ui`. + ## Role Variables for Server These role variables are only required when using `freeipa_server`: diff --git a/ansible/roles/freeipa/tasks/addhost.yml b/ansible/roles/freeipa/tasks/addhost.yml index e1cec0080..cf1f4475a 100644 --- a/ansible/roles/freeipa/tasks/addhost.yml +++ b/ansible/roles/freeipa/tasks/addhost.yml @@ -2,7 +2,7 @@ # This uses DNS to find the ipa server, which works as this is running on the enrolled ipa server # It doesn't fail even if the host doesn't exist community.general.ipa_host: - name: "{{ fqdn }}" + name: "{{ node_fqdn }}" ip_address: "{{ freeipa_client_ip }}" ipa_host: "{{ groups['freeipa_server'].0 }}" ipa_pass: "{{ vault_freeipa_admin_password }}" @@ -17,7 +17,7 @@ - name: Add host to IPA # Using random_password=true this unenroles an enroled host, hence the check above community.general.ipa_host: - name: "{{ fqdn }}" + name: "{{ node_fqdn }}" ip_address: "{{ freeipa_client_ip }}" ipa_host: "{{ groups['freeipa_server'].0 }}" ipa_pass: "{{ vault_freeipa_admin_password }}" diff --git a/ansible/roles/proxy/README.md b/ansible/roles/proxy/README.md index 9af2c5e40..c8b561413 100644 --- a/ansible/roles/proxy/README.md +++ b/ansible/roles/proxy/README.md @@ -6,6 +6,6 @@ Define http/s proxy configuration. - `proxy_http_proxy`: Required. Address of http proxy. E.g. "http://10.1.0.28:3128" for a Squid proxy on default port. - `proxy_https_proxy`: Optional. Address of https proxy. Default is `{{ proxy_http_proxy }}`. -- `proxy_no_proxy`: Optional. Comma-separated list of addresses not to proxy. Default is to concatenate `inventory_hostname` and `fqdn` for all Ansible hosts. +- `proxy_no_proxy`: Optional. Comma-separated list of addresses not to proxy. Default is to concatenate `inventory_hostname` and `node_fqdn` for all Ansible hosts. - `proxy_dnf`: Optional bool. Whether to configure yum/dnf proxying through `proxy_http_proxy`. Default `true`. - `proxy_systemd`: Optional bool. Whether to give processes started by systemd the above http, https and no_proxy configuration. **NB** Running services will need restarting if this is changed. Default `true`. diff --git a/ansible/roles/proxy/defaults/main.yml b/ansible/roles/proxy/defaults/main.yml index 89be5e2eb..eeab877c7 100644 --- a/ansible/roles/proxy/defaults/main.yml +++ b/ansible/roles/proxy/defaults/main.yml @@ -1,5 +1,5 @@ # proxy_http_proxy: proxy_https_proxy: "{{ proxy_http_proxy }}" -proxy_no_proxy: "{{ (groups['all'] + hostvars.values() | map(attribute='fqdn')) | sort | join(',') }}" +proxy_no_proxy: "{{ (groups['all'] + hostvars.values() | map(attribute='node_fqdn')) | sort | join(',') }}" proxy_dnf: true proxy_systemd: true diff --git a/ansible/roles/resolv_conf/templates/resolv.conf.j2 b/ansible/roles/resolv_conf/templates/resolv.conf.j2 index 59c2c000f..320e5e5c9 100644 --- a/ansible/roles/resolv_conf/templates/resolv.conf.j2 +++ b/ansible/roles/resolv_conf/templates/resolv.conf.j2 @@ -1,5 +1,5 @@ # Created by slurm appliance ansible/roles/resolv_conf -search {{ openhpc_cluster_name }}.{{ tld }} +search {{ openhpc_cluster_name }}.{{ cluster_domain_suffix }} {% for ns in resolv_conf_nameservers[0:3] %} nameserver {{ ns }} diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 046b2f985..e2ce8e693 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -1,7 +1,7 @@ --- # Miscellaneous ansible_user: rocky -fqdn: "{{ [inventory_hostname, openhpc_cluster_name, tld] | join('.') }}" # works even without facts unlike ansible_fqdn +node_fqdn: "{{ [inventory_hostname, openhpc_cluster_name, cluster_domain_suffix ] | join('.') }}" # works even without facts unlike ansible_fqdn appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}" appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" #appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf index a93ca0989..dccdc35e0 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf @@ -2,7 +2,7 @@ resource "local_file" "hosts" { content = templatefile("${path.module}/inventory.tpl", { "cluster_name": var.cluster_name, - "tld": var.tld, + "cluster_domain_suffix": var.cluster_domain_suffix, "control": openstack_networking_port_v2.control, "state_dir": var.state_dir, "logins": openstack_networking_port_v2.login, diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl index 4a5df21e2..fee52ce31 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl @@ -1,6 +1,6 @@ [all:vars] openhpc_cluster_name=${cluster_name} -tld=${tld} +cluster_domain_suffix=${cluster_domain_suffix} [control] ${split(".", control.name)[0]} ansible_host=${control.all_fixed_ips[0]} diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf index 6f5d6b2d7..e78a01629 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf @@ -11,7 +11,7 @@ resource "openstack_networking_port_v2" "login" { for_each = toset(keys(var.login_nodes)) - name = "${each.key}.${var.cluster_name}.${var.tld}" + name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -29,7 +29,7 @@ resource "openstack_networking_port_v2" "login" { resource "openstack_networking_port_v2" "control" { - name = "control.${var.cluster_name}.${var.tld}" + name = "control.${var.cluster_name}.${var.cluster_domain_suffix}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -49,7 +49,7 @@ resource "openstack_networking_port_v2" "compute" { for_each = toset(keys(var.compute_nodes)) - name = "${each.key}.${var.cluster_name}.${var.tld}" + name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -70,7 +70,7 @@ resource "openstack_compute_instance_v2" "control" { for_each = var.create_nodes ? toset(["control"]) : toset([]) - name = "control.${var.cluster_name}.${var.tld}" + name = "control.${var.cluster_name}.${var.cluster_domain_suffix}" image_name = data.openstack_images_image_v2.control.name flavor_name = var.control_node.flavor key_pair = var.key_pair @@ -112,7 +112,7 @@ resource "openstack_compute_instance_v2" "control" { user_data = <<-EOF #cloud-config hostname: control - fqdn: control.${var.cluster_name}.${var.tld} + fqdn: control.${var.cluster_name}.${var.cluster_domain_suffix}" prefer_fqdn_over_hostname: true fs_setup: @@ -142,7 +142,7 @@ resource "openstack_compute_instance_v2" "login" { for_each = var.create_nodes ? var.login_nodes : {} - name = "${each.key}.${var.cluster_name}.${var.tld}" + name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" image_name = each.value.image flavor_name = each.value.flavor key_pair = var.key_pair @@ -159,7 +159,7 @@ resource "openstack_compute_instance_v2" "login" { user_data = <<-EOF #cloud-config hostname: ${each.key} - fqdn: ${each.key}.${var.cluster_name}.${var.tld} + fqdn: ${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} prefer_fqdn_over_hostname: true EOF @@ -175,7 +175,7 @@ resource "openstack_compute_instance_v2" "compute" { for_each = var.create_nodes ? var.compute_nodes : {} - name = "${each.key}.${var.cluster_name}.${var.tld}" + name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" image_name = lookup(var.compute_images, each.key, var.compute_types[each.value].image) flavor_name = var.compute_types[each.value].flavor key_pair = var.key_pair @@ -192,7 +192,7 @@ resource "openstack_compute_instance_v2" "compute" { user_data = <<-EOF #cloud-config hostname: ${each.key} - fqdn: ${each.key}.${var.cluster_name}.${var.tld} + fqdn: ${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} prefer_fqdn_over_hostname: true EOF diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf index 1bb8088ec..aa1067008 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/variables.tf @@ -1,11 +1,11 @@ variable "cluster_name" { type = string - description = "Name for cluster, used as prefix for resources" + description = "Name of cluster, used as part of domain name" } -variable "tld" { +variable "cluster_domain_suffix" { type = string - description = "Top level domain name" + description = "Domain suffix for cluster" default = "invalid" } From 9c4a0370f5a1095fded8886b28d6c8183af2a241 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 7 Feb 2023 15:22:11 +0000 Subject: [PATCH 36/99] make freeipa_server_ip safe to template --- .../common/inventory/group_vars/all/freeipa_server.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environments/common/inventory/group_vars/all/freeipa_server.yml b/environments/common/inventory/group_vars/all/freeipa_server.yml index 53ab3964e..82b0d4ca5 100644 --- a/environments/common/inventory/group_vars/all/freeipa_server.yml +++ b/environments/common/inventory/group_vars/all/freeipa_server.yml @@ -3,4 +3,5 @@ freeipa_realm: "{{ openhpc_cluster_name | upper }}.INVALID" freeipa_ds_password: "{{ vault_freeipa_ds_password }}" freeipa_admin_password: "{{ vault_freeipa_admin_password }}" -freeipa_server_ip: "{{ hostvars[groups['freeipa_server'].0].ansible_host }}" # not using ansible_default_ipv4.address as that requires facts +# the below doesn't use ansible_default_ipv4.address as that requires facts, and allows for templating when group freeipa_server is empty +freeipa_server_ip: "{{ hostvars[groups['freeipa_server'].0].ansible_host if groups['freeipa_server'] else false }}" From 9d5051d74e919148812ba1c60edbf23eef4e7239 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 8 Feb 2023 15:26:35 +0000 Subject: [PATCH 37/99] fix /etc/hosts and realm when using cluster_domain_suffix --- ansible/roles/etc_hosts/templates/hosts.j2 | 2 +- environments/common/inventory/group_vars/all/freeipa_server.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/etc_hosts/templates/hosts.j2 b/ansible/roles/etc_hosts/templates/hosts.j2 index 737abe5fe..d82fb7808 100644 --- a/ansible/roles/etc_hosts/templates/hosts.j2 +++ b/ansible/roles/etc_hosts/templates/hosts.j2 @@ -2,5 +2,5 @@ ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 {% for inventory_hostname in groups['etc_hosts'] | sort -%} -{{ hostvars[inventory_hostname]['ansible_host'] }} {% if cluster_domain_suffix is defined %}{{ node_fqdn }}{% endif %} {{ inventory_hostname }} +{{ hostvars[inventory_hostname]['ansible_host'] }} {% if cluster_domain_suffix is defined %}{{ hostvars[inventory_hostname]['node_fqdn'] }}{% endif %} {{ inventory_hostname }} {% endfor -%} diff --git a/environments/common/inventory/group_vars/all/freeipa_server.yml b/environments/common/inventory/group_vars/all/freeipa_server.yml index 82b0d4ca5..7f0fee713 100644 --- a/environments/common/inventory/group_vars/all/freeipa_server.yml +++ b/environments/common/inventory/group_vars/all/freeipa_server.yml @@ -1,6 +1,6 @@ # See ansible/roles/freeipa/README.md # These vars are only used when freeipa_server is enabled. They are not required when enabling only freeipa_client -freeipa_realm: "{{ openhpc_cluster_name | upper }}.INVALID" +freeipa_realm: "{{ openhpc_cluster_name | upper }}.{{ cluster_domain_suffix | upper }}" freeipa_ds_password: "{{ vault_freeipa_ds_password }}" freeipa_admin_password: "{{ vault_freeipa_admin_password }}" # the below doesn't use ansible_default_ipv4.address as that requires facts, and allows for templating when group freeipa_server is empty From 975920235ec3fb2af6731e23e48f192afb7adbf3 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 11 Apr 2023 10:43:13 +0000 Subject: [PATCH 38/99] add adhoc to backup/restore keytabs to/from environment --- ansible/adhoc/backup-keytabs.yml | 11 +++++++++++ ansible/roles/freeipa/tasks/backup-keytabs.yml | 14 ++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 ansible/adhoc/backup-keytabs.yml create mode 100644 ansible/roles/freeipa/tasks/backup-keytabs.yml diff --git a/ansible/adhoc/backup-keytabs.yml b/ansible/adhoc/backup-keytabs.yml new file mode 100644 index 000000000..5566e48ac --- /dev/null +++ b/ansible/adhoc/backup-keytabs.yml @@ -0,0 +1,11 @@ +# Use ONE of the following tags on this playbook: +# - retrieve: copies keytabs out of the state volume to the environment +# - deploy: copies keytabs from the environment to the state volume + +- hosts: freeipa_client + become: yes + gather_facts: no + tasks: + - import_role: + name: freeipa + tasks_from: backup-keytabs.yml diff --git a/ansible/roles/freeipa/tasks/backup-keytabs.yml b/ansible/roles/freeipa/tasks/backup-keytabs.yml new file mode 100644 index 000000000..7fc77f9e1 --- /dev/null +++ b/ansible/roles/freeipa/tasks/backup-keytabs.yml @@ -0,0 +1,14 @@ +- name: Retrieve keytabs to localhost + fetch: + src: "{{ _freeipa_keytab_backup_path }}" + dest: "{{ appliances_environment_root }}/keytabs/{{ inventory_hostname }}/" + flat: true + delegate_to: "{{ groups['control'].0 }}" + tags: retrieve + +- name: Copy keytabs back to control node + copy: + src: "{{ appliances_environment_root }}/keytabs/{{ inventory_hostname }}/" + dest: "{{ _freeipa_keytab_backup_path | dirname }}" + delegate_to: "{{ groups['control'].0 }}" + tags: deploy From 029ef11c1cb470e6570be1514aa7157736de44c8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 29 Jun 2023 16:21:42 +0000 Subject: [PATCH 39/99] move testuser setup to correct location --- .../group_vars/{basic_users/overrides.yml => all/test_user.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename environments/.stackhpc/inventory/group_vars/{basic_users/overrides.yml => all/test_user.yml} (100%) diff --git a/environments/.stackhpc/inventory/group_vars/basic_users/overrides.yml b/environments/.stackhpc/inventory/group_vars/all/test_user.yml similarity index 100% rename from environments/.stackhpc/inventory/group_vars/basic_users/overrides.yml rename to environments/.stackhpc/inventory/group_vars/all/test_user.yml From e0344136fe7a1389820e91076d5d7ff85a362fc1 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 29 Jun 2023 16:22:52 +0000 Subject: [PATCH 40/99] fix definition of control node for freeipa --- ansible/roles/freeipa/tasks/validate.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml index 02f13d18b..a7806606a 100644 --- a/ansible/roles/freeipa/tasks/validate.yml +++ b/ansible/roles/freeipa/tasks/validate.yml @@ -13,6 +13,6 @@ - name: Ensure control node has persistent storage defined assert: - that: "{{ 'appliances_state_dir' in hostvars['control'] }}" + that: "{{ 'appliances_state_dir' in hostvars[groups['control'] | first ] }}" fail_msg: "Variable appliances_state_dir must be defined on the control node (pointing to persistent storage) when using the freeipa role." run_once: true From 6314134a56b7c39531e260aab96c5a27c12ef274 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 09:35:02 +0000 Subject: [PATCH 41/99] still use clustername-each.key with FQDN hosts (for freeipa) --- .../terraform/inventory.tf | 7 +++-- .../terraform/inventory.tpl | 14 +++++----- .../terraform/nodes.tf | 26 +++++++------------ 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf index dccdc35e0..5f195caf2 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tf @@ -3,13 +3,12 @@ resource "local_file" "hosts" { { "cluster_name": var.cluster_name, "cluster_domain_suffix": var.cluster_domain_suffix, - "control": openstack_networking_port_v2.control, + "control_instances": openstack_compute_instance_v2.control + "login_instances": openstack_compute_instance_v2.login + "compute_instances": openstack_compute_instance_v2.compute "state_dir": var.state_dir, - "logins": openstack_networking_port_v2.login, - "computes": openstack_networking_port_v2.compute, "compute_types": var.compute_types, "compute_nodes": var.compute_nodes, - "subnet": data.openstack_networking_subnet_v2.cluster_subnet, }, ) filename = "../inventory/hosts" diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl index fee52ce31..7b4b406b5 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl @@ -3,20 +3,22 @@ openhpc_cluster_name=${cluster_name} cluster_domain_suffix=${cluster_domain_suffix} [control] -${split(".", control.name)[0]} ansible_host=${control.all_fixed_ips[0]} +%{ for control in control_instances ~} +${ control.name } ansible_host=${[for n in control.network: n.fixed_ip_v4 if n.access_network][0]} +%{ endfor ~} [control:vars] # NB needs to be set on group not host otherwise it is ignored in packer build! appliances_state_dir=${state_dir} [login] -%{ for login in logins ~} -${split(".", login.name)[0]} ansible_host=${login.all_fixed_ips[0]} +%{ for login in login_instances ~} +${ login.name } ansible_host=${[for n in login.network: n.fixed_ip_v4 if n.access_network][0]} %{ endfor ~} [compute] -%{ for compute in computes ~} -${split(".", compute.name)[0]} ansible_host=${compute.all_fixed_ips[0]} +%{ for compute in compute_instances ~} +${ compute.name } ansible_host=${[for n in compute.network: n.fixed_ip_v4 if n.access_network][0]} %{ endfor ~} # Define groups for slurm parititions: @@ -24,7 +26,7 @@ ${split(".", compute.name)[0]} ansible_host=${compute.all_fixed_ips[0]} [${cluster_name}_${type_name}] %{~ for node_name, node_type in compute_nodes ~} %{~ if node_type == type_name ~} -${split(".", computes[node_name].name)[0]} +${ compute_instances[node_name].name } %{~ endif ~} %{~ endfor ~} %{ endfor ~} diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf index e78a01629..98a7e9c9c 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/nodes.tf @@ -11,7 +11,7 @@ resource "openstack_networking_port_v2" "login" { for_each = toset(keys(var.login_nodes)) - name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" + name = "${var.cluster_name}-${each.key}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -29,7 +29,7 @@ resource "openstack_networking_port_v2" "login" { resource "openstack_networking_port_v2" "control" { - name = "control.${var.cluster_name}.${var.cluster_domain_suffix}" + name = "${var.cluster_name}-control" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -49,7 +49,7 @@ resource "openstack_networking_port_v2" "compute" { for_each = toset(keys(var.compute_nodes)) - name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" + name = "${var.cluster_name}-${each.key}" network_id = data.openstack_networking_network_v2.cluster_net.id admin_state_up = "true" @@ -70,7 +70,7 @@ resource "openstack_compute_instance_v2" "control" { for_each = var.create_nodes ? toset(["control"]) : toset([]) - name = "control.${var.cluster_name}.${var.cluster_domain_suffix}" + name = "${var.cluster_name}-${each.key}" image_name = data.openstack_images_image_v2.control.name flavor_name = var.control_node.flavor key_pair = var.key_pair @@ -111,10 +111,8 @@ resource "openstack_compute_instance_v2" "control" { user_data = <<-EOF #cloud-config - hostname: control - fqdn: control.${var.cluster_name}.${var.cluster_domain_suffix}" - prefer_fqdn_over_hostname: true - + fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} + fs_setup: - label: state filesystem: ext4 @@ -142,7 +140,7 @@ resource "openstack_compute_instance_v2" "login" { for_each = var.create_nodes ? var.login_nodes : {} - name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" + name = "${var.cluster_name}-${each.key}" image_name = each.value.image flavor_name = each.value.flavor key_pair = var.key_pair @@ -158,9 +156,7 @@ resource "openstack_compute_instance_v2" "login" { user_data = <<-EOF #cloud-config - hostname: ${each.key} - fqdn: ${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} - prefer_fqdn_over_hostname: true + fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} EOF lifecycle{ @@ -175,7 +171,7 @@ resource "openstack_compute_instance_v2" "compute" { for_each = var.create_nodes ? var.compute_nodes : {} - name = "${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}" + name = "${var.cluster_name}-${each.key}" image_name = lookup(var.compute_images, each.key, var.compute_types[each.value].image) flavor_name = var.compute_types[each.value].flavor key_pair = var.key_pair @@ -191,9 +187,7 @@ resource "openstack_compute_instance_v2" "compute" { user_data = <<-EOF #cloud-config - hostname: ${each.key} - fqdn: ${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} - prefer_fqdn_over_hostname: true + fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} EOF lifecycle{ From ca45ee261fd92cdaeb1cf687411112925581aa82 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 09:35:57 +0000 Subject: [PATCH 42/99] fix etc_hosts template when using cluster_domain_suffix --- ansible/roles/etc_hosts/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/etc_hosts/defaults/main.yml b/ansible/roles/etc_hosts/defaults/main.yml index b50412466..48df2b9f7 100644 --- a/ansible/roles/etc_hosts/defaults/main.yml +++ b/ansible/roles/etc_hosts/defaults/main.yml @@ -1,3 +1,3 @@ etc_hosts_template: hosts.j2 -etc_hosts_hostvars: "{{ ['ansible_host'] + ([node_fqdn] if cluster_domain_suffix is defined else []) + ['inventory_hostname'] }}" +etc_hosts_hostvars: "{{ ['ansible_host'] + (['node_fqdn'] if cluster_domain_suffix is defined else []) + ['inventory_hostname'] }}" etc_hosts_extra_hosts: '' From c086c9e3efbd9d570e1a629a48c6448b919e60f5 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 09:36:46 +0000 Subject: [PATCH 43/99] revert rebuild adhoc now inventory_hostname isn't changed with fqdn hosts --- ansible/adhoc/rebuild.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ansible/adhoc/rebuild.yml b/ansible/adhoc/rebuild.yml index e9b170d9e..c30737fd6 100644 --- a/ansible/adhoc/rebuild.yml +++ b/ansible/adhoc/rebuild.yml @@ -13,8 +13,6 @@ become: no gather_facts: no tasks: - - command: "openstack server rebuild {{ instance_id | default(instance_name) }}{% if rebuild_image is defined %} --image {{ rebuild_image }}{% endif %}" + - command: "openstack server rebuild {{ instance_id | default(inventory_hostname) }}{% if rebuild_image is defined %} --image {{ rebuild_image }}{% endif %}" delegate_to: localhost - vars: - instance_name: "{{ node_fqdn if cluster_domain_suffix is defined else inventory_hostname }}" - wait_for_connection: From 78b069c223a4d5610da077fc7e5bb5f594d725a7 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 09:37:02 +0000 Subject: [PATCH 44/99] Revert "move freeipa before filesystems so nfs clients can find server" This reverts commit 6ee0c12d11c007be4a034bcbf7034a9618a105df. --- ansible/site.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/site.yml b/ansible/site.yml index fd564367f..37befa547 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -17,12 +17,12 @@ import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}" when: hook_path | exists -- import_playbook: iam.yml - import_playbook: filesystems.yml - import_playbook: extras.yml - import_playbook: slurm.yml - import_playbook: portal.yml - import_playbook: monitoring.yml +- import_playbook: iam.yml - name: Run post.yml hook vars: From 78a213e09dd052c5b751dec659f4ca9db06c6f3a Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 09:38:34 +0000 Subject: [PATCH 45/99] move basic_users to after filesystems.yml so templating is done onto *shared* home --- ansible/extras.yml | 10 ++++++++++ ansible/iam.yml | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ansible/extras.yml b/ansible/extras.yml index 28dbb46cb..63a3b9eb8 100644 --- a/ansible/extras.yml +++ b/ansible/extras.yml @@ -1,3 +1,13 @@ +- hosts: basic_users + become: yes + tags: + - basic_users + - users + gather_facts: yes + tasks: + - import_role: + name: basic_users + - hosts: cuda become: yes gather_facts: no diff --git a/ansible/iam.yml b/ansible/iam.yml index ee47e33fc..655fc3f73 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -36,13 +36,3 @@ import_role: name: freeipa tasks_from: users.yml - -- hosts: basic_users - become: yes - tags: - - basic_users - - users - gather_facts: yes - tasks: - - import_role: - name: basic_users From eba82fd9c70ce2711ef026628899df0f56afa526 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 09:39:07 +0000 Subject: [PATCH 46/99] move EEESI to extras --- ansible/bootstrap.yml | 10 ---------- ansible/extras.yml | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index f765faf85..1f7356fd6 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -127,16 +127,6 @@ tasks_from: config.yml tags: config -- name: Setup EESSI - hosts: eessi - tags: eessi - become: true - gather_facts: false - tasks: - - name: Install and configure EESSI - import_role: - name: eessi - - hosts: update gather_facts: false become: yes diff --git a/ansible/extras.yml b/ansible/extras.yml index 63a3b9eb8..89583a34f 100644 --- a/ansible/extras.yml +++ b/ansible/extras.yml @@ -8,6 +8,16 @@ - import_role: name: basic_users +- name: Setup EESSI + hosts: eessi + tags: eessi + become: true + gather_facts: false + tasks: + - name: Install and configure EESSI + import_role: + name: eessi + - hosts: cuda become: yes gather_facts: no From 08806101b7273a7a44f7f793592afb15adb36584 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 09:59:57 +0000 Subject: [PATCH 47/99] remove duplicate resolv_conf role file --- ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 diff --git a/ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 b/ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 deleted file mode 100644 index d435aba99..000000000 --- a/ansible/roles/resolv_conf/files/NetworkManager-dns-none.conf.j2 +++ /dev/null @@ -1,2 +0,0 @@ -[main] -dns=none From 7aa6230eb78958403f4088cdd438a57aba99e256 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 10:59:05 +0000 Subject: [PATCH 48/99] Revert "Revert "move freeipa before filesystems so nfs clients can find server"" This reverts commit 78b069c223a4d5610da077fc7e5bb5f594d725a7. --- ansible/site.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/site.yml b/ansible/site.yml index 37befa547..fd564367f 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -17,12 +17,12 @@ import_playbook: "{{ hook_path if hook_path | exists else 'noop.yml' }}" when: hook_path | exists +- import_playbook: iam.yml - import_playbook: filesystems.yml - import_playbook: extras.yml - import_playbook: slurm.yml - import_playbook: portal.yml - import_playbook: monitoring.yml -- import_playbook: iam.yml - name: Run post.yml hook vars: From dc926f2cbdb008645d687ded3c44baae80fc254d Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 11:40:58 +0000 Subject: [PATCH 49/99] update freeipa readme --- ansible/roles/freeipa/README.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index 502013946..2cc27095c 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -1,19 +1,35 @@ # freeipa -Support FreeIPA in the appliance. In production use it is expected the FreeIPA server(s) will be external to the cluster, implying that hosts and users are managed outside the appliance. However for testing and development the role can also deploy a FreeIPA server, add hosts to it and manage users in FreeIPA. +Support FreeIPA in the appliance. In production use it is expected the FreeIPA server(s) will be external to the cluster, implying that hosts and users are managed outside the appliance. However for testing and development the role can also deploy an "in-appliance" FreeIPA server, add hosts to it and manage users in FreeIPA. # FreeIPA Client ## Usage - Add hosts to the `freeipa_client` group and run (at a minimum) the `ansible/iam.yml` playbook. - Host names must match the domain name. By default (using the skeleton Terraform) hostnames are of the form `nodename.cluster_name.cluster_domain_suffix` where `cluster_name` and `cluster_domain_suffix` are Terraform variables. -- Hosts discover the FreeIPA server FQDN (and their own domain) from DNS records. If this is not automatically the case (e.g. when using the in-appliance FreeIPA development server) the `resolv_conf` role run by `ansible/bootstrap.yml` can be used to add the FreeIPA server as the nameserver. +- Hosts discover the FreeIPA server FQDN (and their own domain) from DNS records. If this is not set from DHCP, then use the `resolv_conf` role to configure this. For example when using the in-appliance FreeIPA development server,: + + ```ini + # environments//groups + ... + [resolv_conf:children] + freeipa_client + ... + ``` + + ```yaml + # environments//inventory/group_vars/all/resolv_conf.yml + resolv_conf_nameservers: + - "{{ hostvars[groups['freeipa_server'] | first].ansible_host }}" + ``` + + - For production use with an external FreeIPA server, a random one-time password (OTP) must be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrole the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. - For development use with the in-appliance FreeIPA server, `freeipa_host_password` will be automatically generated in memory. - The `control` host must define `appliances_state_dir` (on persistent storage). This is used to back-up keytabs to allow FreeIPA clients to automatically reenroll after e.g. reimaging. Note that: - This is implemented when using the skeleton Terraform; on the control node `appliances_state_dir` defaults to `/var/lib/state` which is mounted from a volume. - - Nodes are not re-enroled by a [Slurm-driven reimage](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md)) as that does not run this role. + - Nodes are not re-enroled by a [Slurm-driven reimage](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md) (as that does not run this role). - If both a backed-up keytab and `freeipa_host_password` exist, the former is used. @@ -43,7 +59,6 @@ These role variables are only required when using `freeipa_server`: - `freeipa_domain`: Optional, name of domain. Default is lowercased `freeipa_realm`. - `freeipa_ds_password`: Optional, password to be used by the Directory Server for the Directory Manager user (`ipa-server-install --ds-password`). Default is generated in `environments//inventory/group_vars/all/secrets.yml` - `freeipa_admin_password`: Optional, password for the IPA `admin` user. Default is generated as for `freeipa_ds_password`. -- `freeipa_generate_host_password`: Optional bool, whether to generate a password for each `freeipa_client` host. Should only be used on the first run, as generating a new password will unenrole enroled hosts. - `freeipa_server_ip`: Optional, IP address of freeipa_server host. Default is `ansible_host` of the `freeipa_server` host. Default `false`. - `freeipa_setup_dns`: Optional bool, whether to configure the FreeIPA server as an integrated DNS server and define a zone and records. NB: This also controls whether `freeipa_client` hosts use the `freeipa_server` host for name resolution. Default `true` when `freeipa_server` contains a host. - `freeipa_client_ip`: Optional, IP address of FreeIPA client. Default is `ansible_host`. From 8e511e62a14a5a2afcd2e2e69a4418acbac9e1aa Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 12:11:08 +0000 Subject: [PATCH 50/99] fix freeipa client task inventory names --- ansible/roles/freeipa/tasks/client.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index e49b43b36..7c3fdb222 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -7,7 +7,7 @@ - name: Retrieve persisted keytab from previous enrolement slurp: src: "{{ _freeipa_keytab_backup_path }}" - delegate_to: control + delegate_to: "{{ groups['control'] | first }}" register: _slurp_persisted_keytab failed_when: false @@ -73,11 +73,10 @@ owner: root group: root mode: ug=wrX,o= - delegate_to: control + delegate_to: "{{ groups['control'] | first }}" - name: Persist keytab copy: content: "{{ _slurp_current_keytab.content | b64decode }}" dest: "{{ _freeipa_keytab_backup_path }}" - delegate_to: control - # when: appliances_state_dir is defined + delegate_to: "{{ groups['control'] | first }}" From e3d8a6057ea3a0366447ce8ee5bf462ce02f1e53 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 12:13:38 +0000 Subject: [PATCH 51/99] disable freeipa in CI, just provide examples --- environments/.stackhpc/inventory/extra_groups | 24 ++++++++++++------- .../inventory/group_vars/all/freeipa.yml | 8 ++++++- .../group_vars/resolv_conf/overrides.yml | 2 -- 3 files changed, 23 insertions(+), 11 deletions(-) delete mode 100644 environments/.stackhpc/inventory/group_vars/resolv_conf/overrides.yml diff --git a/environments/.stackhpc/inventory/extra_groups b/environments/.stackhpc/inventory/extra_groups index 3f024a1c5..aae0e1a6e 100644 --- a/environments/.stackhpc/inventory/extra_groups +++ b/environments/.stackhpc/inventory/extra_groups @@ -1,13 +1,21 @@ +[basic_users:children] +cluster + [rebuild:children] control compute -[freeipa_server:children] -control - -[freeipa_client:children] -login -compute +[etc_hosts:children] +cluster -[resolv_conf:children] -freeipa_client +# -- Example of enabling FreeIPA with in-appliance (dev-only) server +# [freeipa_server:children] +# control +# +# [freeipa_client:children] +# login +# compute +# +# [resolv_conf:children] +# freeipa_client +# --- end of FreeIPA example --- diff --git a/environments/.stackhpc/inventory/group_vars/all/freeipa.yml b/environments/.stackhpc/inventory/group_vars/all/freeipa.yml index 10806fac6..030c86db6 100644 --- a/environments/.stackhpc/inventory/group_vars/all/freeipa.yml +++ b/environments/.stackhpc/inventory/group_vars/all/freeipa.yml @@ -1,6 +1,12 @@ +# This file provides examples of using freeipa role variables. These are NOT functional in CI as freeipa_{server,client} groups are not defined. + +# NB: Users defined this way have expired passwords freeipa_users: - name: freeipatestuser # can't use rocky as $HOME isn't shared! password: "{{ vault_freeipatestuser_password }}" givenname: test sn: test - krbpasswordexpiration: "{{ lookup('pipe', 'date --date \"1 day\" +%Y%m%d%H%M%S') }}" # password will work for 24hrs + +# freeipa_client hosts must use a FreeIPA server for name resolution - requires hosts to be in group `resolv_conf`. +resolv_conf_nameservers: + - "{{ hostvars[groups['freeipa_server'].0].ansible_host }}" diff --git a/environments/.stackhpc/inventory/group_vars/resolv_conf/overrides.yml b/environments/.stackhpc/inventory/group_vars/resolv_conf/overrides.yml deleted file mode 100644 index 668cd9d07..000000000 --- a/environments/.stackhpc/inventory/group_vars/resolv_conf/overrides.yml +++ /dev/null @@ -1,2 +0,0 @@ -resolv_conf_nameservers: - - "{{ hostvars[groups['freeipa_server'].0].ansible_host }}" \ No newline at end of file From 52f062f32210045bea7d5c97cfe53976a05f989a Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 14:30:39 +0000 Subject: [PATCH 52/99] move node_fqdn to terraform inventory --- ansible/roles/etc_hosts/README.md | 3 +-- ansible/roles/etc_hosts/defaults/main.yml | 2 +- environments/common/inventory/group_vars/all/defaults.yml | 1 - .../{{cookiecutter.environment}}/terraform/inventory.tpl | 6 +++--- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/ansible/roles/etc_hosts/README.md b/ansible/roles/etc_hosts/README.md index 1372560af..0ad95681a 100644 --- a/ansible/roles/etc_hosts/README.md +++ b/ansible/roles/etc_hosts/README.md @@ -4,8 +4,7 @@ Hosts in the `etc_hosts` groups have `/etc/hosts` created with entries of the fo By default, an entry is created for each host in this group as follows: - The value of `ansible_host` is used as the IP_address. -- If `cluster_domain_suffix` is defined then `node_fqdn` is used as the canonical hostname and `inventory_hostname` as an alias. -- If `cluster_domain_suffix` is not defined then `inventory_hostname` is used as the canonical hostname. +- If `node_fqdn` is defined then that is used as the canonical hostname and `inventory_hostname` as an alias. Otherwise `inventory_hostname` is used as the canonical hostname. This may need overriding for multi-homed hosts or hosts with multiple aliases. # Variables diff --git a/ansible/roles/etc_hosts/defaults/main.yml b/ansible/roles/etc_hosts/defaults/main.yml index 48df2b9f7..c2ecbca0c 100644 --- a/ansible/roles/etc_hosts/defaults/main.yml +++ b/ansible/roles/etc_hosts/defaults/main.yml @@ -1,3 +1,3 @@ etc_hosts_template: hosts.j2 -etc_hosts_hostvars: "{{ ['ansible_host'] + (['node_fqdn'] if cluster_domain_suffix is defined else []) + ['inventory_hostname'] }}" +etc_hosts_hostvars: "{{ ['ansible_host'] + (['node_fqdn'] if node_fqdn is defined else []) + ['inventory_hostname'] }}" etc_hosts_extra_hosts: '' diff --git a/environments/common/inventory/group_vars/all/defaults.yml b/environments/common/inventory/group_vars/all/defaults.yml index 5292b8ac7..528dede1d 100644 --- a/environments/common/inventory/group_vars/all/defaults.yml +++ b/environments/common/inventory/group_vars/all/defaults.yml @@ -1,7 +1,6 @@ --- # Miscellaneous ansible_user: rocky -node_fqdn: "{{ ([inventory_hostname, openhpc_cluster_name] + ([cluster_domain_suffix] if cluster_domain_suffix is defined else [])) | join('.') }}" # works even without facts unlike ansible_fqdn appliances_repository_root: "{{ lookup('env', 'APPLIANCES_REPO_ROOT') }}" appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" #appliances_state_dir: # define an absolute path here to use for persistent state: NB: This is defined as /var/lib/state in inventory by the default Terraform diff --git a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl index 7b4b406b5..11b2cfd45 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/terraform/inventory.tpl @@ -4,7 +4,7 @@ cluster_domain_suffix=${cluster_domain_suffix} [control] %{ for control in control_instances ~} -${ control.name } ansible_host=${[for n in control.network: n.fixed_ip_v4 if n.access_network][0]} +${ control.name } ansible_host=${[for n in control.network: n.fixed_ip_v4 if n.access_network][0]} node_fqdn=${ control.name }.${cluster_name}.${cluster_domain_suffix} %{ endfor ~} [control:vars] @@ -13,12 +13,12 @@ appliances_state_dir=${state_dir} [login] %{ for login in login_instances ~} -${ login.name } ansible_host=${[for n in login.network: n.fixed_ip_v4 if n.access_network][0]} +${ login.name } ansible_host=${[for n in login.network: n.fixed_ip_v4 if n.access_network][0]} node_fqdn=${ login.name }.${cluster_name}.${cluster_domain_suffix} %{ endfor ~} [compute] %{ for compute in compute_instances ~} -${ compute.name } ansible_host=${[for n in compute.network: n.fixed_ip_v4 if n.access_network][0]} +${ compute.name } ansible_host=${[for n in compute.network: n.fixed_ip_v4 if n.access_network][0]} node_fqdn=${ compute.name }.${cluster_name}.${cluster_domain_suffix} %{ endfor ~} # Define groups for slurm parititions: From d6fe2643d56222de4134c5967d0d343ed69fd788 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 4 Jul 2023 14:31:28 +0000 Subject: [PATCH 53/99] document stackhpc env freeipa setup --- environments/.stackhpc/inventory/extra_groups | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/environments/.stackhpc/inventory/extra_groups b/environments/.stackhpc/inventory/extra_groups index aae0e1a6e..d1da6f4a8 100644 --- a/environments/.stackhpc/inventory/extra_groups +++ b/environments/.stackhpc/inventory/extra_groups @@ -8,7 +8,10 @@ compute [etc_hosts:children] cluster -# -- Example of enabling FreeIPA with in-appliance (dev-only) server +# -- Example of enabling FreeIPA with an in-appliance (dev-only) server +# NB: The etc_hosts and basic_users group definitions above should be commented out +# The freeipa_* hosts will pick up configuration from environments/.stackhpc/inventory/group_vars/all/freeipa.yml + # [freeipa_server:children] # control # From cca7389dc01f16996da74c30181169aee66c135f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 19 May 2023 09:18:32 +0000 Subject: [PATCH 54/99] enable NFS PRC security service for freeipa clients --- ansible/roles/freeipa/tasks/client.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/client.yml index 7c3fdb222..1ff8f57b6 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/client.yml @@ -60,6 +60,15 @@ ipa_client_install_password.rc != 0 and 'IPA client is already configured' not in ipa_client_install_password.stderr +- name: Ensure NFS RPC security service is running + # This service is installed by nfs-utils, which attempts to start it. + # It has ConditionPathExists=/etc/krb5.keytab which fails if host is not enroled. + # This task avoids a reboot. + systemd: + name: rpc-gssd.service + state: started + enabled: true + - name: Retrieve current keytab slurp: src: /etc/krb5.keytab From b304a47a659d9f8baae1898be0c52f671c10e909 Mon Sep 17 00:00:00 2001 From: Thomas Berger Date: Wed, 20 Sep 2023 16:16:21 +1200 Subject: [PATCH 55/99] Update fatimage.yml --- ansible/fatimage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index 36b608499..7f06923d5 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -45,7 +45,7 @@ tasks_from: install.yml - name: Include distribution variables for osc.ood - include_vars: "{{ appliances_repository_root }}/ansible/roles/osc.ood/vars/Rocky.yml" + include_vars: "{{ appliances_repository_root }}/ansible/roles/osc.ood/vars/Rocky/8.yml" # FUTURE: install-apps.yml - this is git clones # - import_playbook: portal.yml From c5e27d400383b1da8311db1bb1f4501d4baec31e Mon Sep 17 00:00:00 2001 From: Thomas Berger Date: Wed, 20 Sep 2023 16:18:29 +1200 Subject: [PATCH 56/99] oodv3 changes --- ansible/roles/openondemand/tasks/main.yml | 2 +- environments/common/inventory/group_vars/all/openondemand.yml | 2 +- requirements.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/openondemand/tasks/main.yml b/ansible/roles/openondemand/tasks/main.yml index f1c5720ae..a74f0e24e 100644 --- a/ansible/roles/openondemand/tasks/main.yml +++ b/ansible/roles/openondemand/tasks/main.yml @@ -10,7 +10,7 @@ - include_role: name: osc.ood tasks_from: install-package.yml - vars_from: Rocky.yml + vars_from: Rocky/8.yml public: yes # Expose the vars from this role to the rest of the play # can't set vars: from a dict hence the workaround above diff --git a/environments/common/inventory/group_vars/all/openondemand.yml b/environments/common/inventory/group_vars/all/openondemand.yml index c29b6a559..c63191095 100644 --- a/environments/common/inventory/group_vars/all/openondemand.yml +++ b/environments/common/inventory/group_vars/all/openondemand.yml @@ -13,7 +13,7 @@ # or include regex special characters. openondemand_host_regex: "{{ (groups['compute'] + groups['grafana']) | to_ood_regex }}" -ondemand_package: ondemand-2.0.29 +ondemand_package: ondemand-3.0.1 openondemand_dashboard_links: # TODO: should really only be deployed if grafana is deployed and proxying configured - name: Grafana diff --git a/requirements.yml b/requirements.yml index 482d477f2..93be4c9f9 100644 --- a/requirements.yml +++ b/requirements.yml @@ -19,7 +19,7 @@ roles: # No versions available - src: https://github.com/OSC/ood-ansible.git name: osc.ood - version: v2.0.8 + version: v3.0.6 collections: - name: containers.podman From 06baa592db7b553707e2f6f1dfaf737a8362b5c8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 26 Sep 2023 14:39:31 +0000 Subject: [PATCH 57/99] bump image --- environments/.stackhpc/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index d3771fafb..d6806baa3 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -13,7 +13,7 @@ variable "cluster_name" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-230922-0940-434e190f" # https://github.com/stackhpc/ansible-slurm-appliance/pull/313 + default = "openhpc-230926-1343-e3d3e307" # https://github.com/stackhpc/ansible-slurm-appliance/pull/314 # default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2" # default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" } From 2a832f5e179b21c3e45e8609551e304bcef252b8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 4 Oct 2023 14:38:50 +0000 Subject: [PATCH 58/99] make dnf module install of nvidia-driver idempotent --- ansible/roles/cuda/README.md | 2 ++ ansible/roles/cuda/tasks/main.yml | 34 +++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/ansible/roles/cuda/README.md b/ansible/roles/cuda/README.md index 7b6f2ea25..8c36907ee 100644 --- a/ansible/roles/cuda/README.md +++ b/ansible/roles/cuda/README.md @@ -2,6 +2,8 @@ Install NVIDIA CUDA. The CUDA binaries are added to the PATH for all users, and the [NVIDIA persistence daemon](https://docs.nvidia.com/deploy/driver-persistence/index.html#persistence-daemon) is enabled. +To avoid unwanted package updates which break functionality, on first use this role enables the dkms-flavour `nvidia-driver` DNF module stream which has the current highest version number. The `latest-dkms` stream is not enabled, and subsequent runs of the role will *not* switch which stream is enabled even if later version streams become available. If an upgrade of the `nvidia-driver` module is required, the currently-enabled stream and all packages should be manually removed. + ## Prerequisites Requires OFED to be installed to provide required kernel-* packages. diff --git a/ansible/roles/cuda/tasks/main.yml b/ansible/roles/cuda/tasks/main.yml index 0fa70928d..b323cfc04 100644 --- a/ansible/roles/cuda/tasks/main.yml +++ b/ansible/roles/cuda/tasks/main.yml @@ -17,22 +17,40 @@ dest: "/etc/yum.repos.d/cuda-{{ cuda_distro }}.repo" url: "{{ cuda_repo }}" +- name: Check if nvidia driver module is enabled + shell: + cmd: dnf module list --enabled nvidia-driver + changed_when: false + failed_when: false + register: _cuda_driver_module_enabled + +- name: List nvidia driver dnf module stream versions + shell: + cmd: dnf module list nvidia-driver | grep -oP "\d+-dkms" | sort -V + # Output of interest from command is something like (some whitespace removed): + # "nvidia-driver 418-dkms default [d], fm, ks Nvidia driver for 418-dkms branch " + changed_when: false + register: _cuda_driver_module_streams + when: "'No matching Modules to list' in _cuda_driver_module_enabled.stderr" + - name: Enable nvidia driver module - ansible.builtin.command: dnf module enable -y nvidia-driver:latest-dkms - register: nvidiadriver_enable - changed_when: "'Nothing to do' not in nvidiadriver_enable.stdout" + ansible.builtin.command: "dnf module enable -y nvidia-driver:{{ _cuda_driver_module_streams.stdout_lines | last }}" + register: _cuda_driver_module_enable + when: "'No matching Modules to list' in _cuda_driver_module_enabled.stderr" + changed_when: "'Nothing to do' not in _cuda_driver_module_enable.stdout" -- name: Install nvidia driver module - ansible.builtin.command: dnf module install -y nvidia-driver:latest-dkms - register: nvidiadriver_install - changed_when: "'Nothing to do' not in nvidiadriver_install.stdout" +- name: Install nvidia drivers # TODO: make removal possible? + ansible.builtin.command: dnf module install -y nvidia-driver + register: _cuda_driver_install + when: "'No matching Modules to list' in _cuda_driver_module_enabled.stderr" + changed_when: "'Nothing to do' not in _cuda_driver_install.stdout" - name: Install cuda packages ansible.builtin.dnf: name: "{{ cuda_packages }}" register: cuda_package_install -- name: Add latest cuda binaries to path +- name: Add cuda binaries to path lineinfile: path: /etc/profile.d/sh.local line: 'export PATH=$PATH:$(ls -1d /usr/local/cuda-* | sort -V | tail -1)/bin' From fc2de71774dcd36dac99355acf2bcd167420173e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 6 Oct 2023 13:31:15 +0000 Subject: [PATCH 59/99] make it clear _cuda_version_tuple is a private var --- ansible/roles/cuda/defaults/main.yml | 4 ++-- ansible/roles/cuda/tasks/samples.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml index 6b04d428b..897dd0c7f 100644 --- a/ansible/roles/cuda/defaults/main.yml +++ b/ansible/roles/cuda/defaults/main.yml @@ -3,8 +3,8 @@ cuda_repo: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_dis cuda_packages: - cuda - nvidia-gds -# cuda_version_tuple: # discovered from installed package e.g. ('12', '1', '0') -cuda_version_short: "{{ cuda_version_tuple[0] }}.{{ cuda_version_tuple[1] }}" +# _cuda_version_tuple: # discovered from installed package e.g. ('12', '1', '0') +cuda_version_short: "{{ _cuda_version_tuple[0] }}.{{ cuda_version_tuple[1] }}" cuda_samples_release_url: "https://github.com/NVIDIA/cuda-samples/archive/refs/tags/v{{ cuda_version_short }}.tar.gz" cuda_samples_path: "/home/{{ ansible_user }}/cuda_samples" cuda_samples_programs: diff --git a/ansible/roles/cuda/tasks/samples.yml b/ansible/roles/cuda/tasks/samples.yml index ebe00a68f..bf48c4aa4 100644 --- a/ansible/roles/cuda/tasks/samples.yml +++ b/ansible/roles/cuda/tasks/samples.yml @@ -5,7 +5,7 @@ - name: Set fact for discovered cuda version set_fact: - cuda_version_tuple: "{{ (_cuda_samples_version.content | b64decode | from_json).cuda.version | split('.') }}" # e.g. '12.1.0' + _cuda_version_tuple: "{{ (_cuda_samples_version.content | b64decode | from_json).cuda.version | split('.') }}" # e.g. '12.1.0' - name: Ensure cuda_samples_path exists file: From 1472bd691e3c10ec23c9245f22e090a942170451 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 6 Oct 2023 14:07:19 +0000 Subject: [PATCH 60/99] add cuda_driver_stream variable --- ansible/roles/cuda/README.md | 3 +-- ansible/roles/cuda/defaults/main.yml | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/cuda/README.md b/ansible/roles/cuda/README.md index 8c36907ee..141e7b80d 100644 --- a/ansible/roles/cuda/README.md +++ b/ansible/roles/cuda/README.md @@ -2,8 +2,6 @@ Install NVIDIA CUDA. The CUDA binaries are added to the PATH for all users, and the [NVIDIA persistence daemon](https://docs.nvidia.com/deploy/driver-persistence/index.html#persistence-daemon) is enabled. -To avoid unwanted package updates which break functionality, on first use this role enables the dkms-flavour `nvidia-driver` DNF module stream which has the current highest version number. The `latest-dkms` stream is not enabled, and subsequent runs of the role will *not* switch which stream is enabled even if later version streams become available. If an upgrade of the `nvidia-driver` module is required, the currently-enabled stream and all packages should be manually removed. - ## Prerequisites Requires OFED to be installed to provide required kernel-* packages. @@ -12,5 +10,6 @@ Requires OFED to be installed to provide required kernel-* packages. - `cuda_distro`: Optional. Default `rhel8`. - `cuda_repo`: Optional. Default `https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_distro }}/x86_64/cuda-{{ cuda_distro }}.repo` +- `cuda_driver_stream`: Optional. The default value `default` will, on first use of this role, enable the dkms-flavour `nvidia-driver` DNF module stream with the current highest version number. The `latest-dkms` stream is not enabled, and subsequent runs of the role will *not* change the enabled stream, even if a later version has become available. Changing this value once an `nvidia-driver` stream has been enabled raises an error. If an upgrade of the `nvidia-driver` module is required, the currently-enabled stream and all packages should be manually removed. - `cuda_packages`: Optional. Default: `['cuda', 'nvidia-gds']`. - `cuda_persistenced_state`: Optional. State of systemd `nvidia-persistenced` service. Values as [ansible.builtin.systemd:state](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/systemd_module.html#parameter-state). Default `started`. diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml index 897dd0c7f..6b377a10b 100644 --- a/ansible/roles/cuda/defaults/main.yml +++ b/ansible/roles/cuda/defaults/main.yml @@ -1,5 +1,6 @@ cuda_distro: rhel8 cuda_repo: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_distro }}/x86_64/cuda-{{ cuda_distro }}.repo" +cuda_driver_stream: default cuda_packages: - cuda - nvidia-gds From 888165711426d0c751aa99a915797401c31424ec Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 12 Oct 2023 09:10:53 +0000 Subject: [PATCH 61/99] use setup-env script to update galaxy installs --- dev/setup-env.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dev/setup-env.sh b/dev/setup-env.sh index 41c0c29af..f24f6226a 100755 --- a/dev/setup-env.sh +++ b/dev/setup-env.sh @@ -1,10 +1,12 @@ #!/bin/bash -/usr/bin/python3.8 -m venv venv # use `sudo yum install python38` on Rocky Linux 8 to install this +if [[ ! -d "venv" ]]; then + /usr/bin/python3.8 -m venv venv # use `sudo yum install python38` on Rocky Linux 8 to install this +fi . venv/bin/activate pip install -U pip pip install -r requirements.txt ansible --version -# Install ansible dependencies ... -ansible-galaxy role install -r requirements.yml -p ansible/roles -ansible-galaxy collection install -r requirements.yml -p ansible/collections +# Install or update ansible dependencies ... +ansible-galaxy role install -fr requirements.yml -p ansible/roles +ansible-galaxy collection install -fr requirements.yml -p ansible/collections From 47208bb470bb08c0f4e6c016bde77967359f38a8 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 12 Oct 2023 09:15:18 +0000 Subject: [PATCH 62/99] add no_log override --- ansible/roles/mysql/tasks/configure.yml | 2 +- environments/.stackhpc/hooks/pre.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/mysql/tasks/configure.yml b/ansible/roles/mysql/tasks/configure.yml index e29a536b5..d4dd4cd54 100644 --- a/ansible/roles/mysql/tasks/configure.yml +++ b/ansible/roles/mysql/tasks/configure.yml @@ -21,7 +21,7 @@ community.mysql.mysql_info: login_user: root login_password: "{{ mysql_root_password }}" - # no_log: true # TODO: FIXME + no_log: "{{ no_log | default(true) }}" register: _mysql_info until: "'version' in _mysql_info" retries: 90 diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml index d26045f2d..65dfad72d 100644 --- a/environments/.stackhpc/hooks/pre.yml +++ b/environments/.stackhpc/hooks/pre.yml @@ -10,7 +10,7 @@ mode: 0400 owner: root group: root - no_log: true + no_log: "{{ no_log | default(true) }}" loop: - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/hosts" - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/group_vars/all/secrets.yml" From 7f8c39cae679f24254324eed665dea273fcbe2be Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 12 Oct 2023 09:16:01 +0000 Subject: [PATCH 63/99] add delete-cluster script --- dev/delete-cluster.py | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 dev/delete-cluster.py diff --git a/dev/delete-cluster.py b/dev/delete-cluster.py new file mode 100755 index 000000000..861396efd --- /dev/null +++ b/dev/delete-cluster.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +""" +Delete infrastructure for a cluster without using Terraform. Useful for CI clusters. + +Usage: + delete-cluster.py PREFIX + +Where PREFIX is the string at the start of the resource's names. +It will list matching resources and prompt to confirm deletion. +""" + +import sys, json, subprocess, pprint + + +CLUSTER_RESOURCES = ['server', 'port', 'volume'] + +def delete_cluster(cluster_prefix): + to_delete = {} + for resource_type in CLUSTER_RESOURCES: + to_delete[resource_type] = [] + resource_list = subprocess.run(f'openstack {resource_type} list --format json', stdout=subprocess.PIPE, shell=True) + resources = json.loads(resource_list.stdout) + for item in resources: + try: + if item['Name'] is not None and item['Name'].startswith(cluster_prefix): + print(resource_type, item['Name'], item['ID']) + to_delete[resource_type].append(item) + except: + print(resource_type, item) + raise + if input('Delete these (y/n)?:') == 'y': + for resource_type in CLUSTER_RESOURCES: + items = [v['ID'] for v in to_delete[resource_type]] + if items: + # delete all resources of each type in a single call for speed: + subprocess.run(f"openstack {resource_type} delete {' '.join(items)}", stdout=subprocess.PIPE, shell=True) + print(f'Deleted {len(items)} {resource_type}s') + else: + print('Cancelled - no resources deleted') + +if __name__ == '__main__': + if len(sys.argv) != 2: + print('ERROR: Incorrect argument(s).\n' + __doc__) + exit(1) + delete_cluster(sys.argv[1]) From 0260f4d6b133c5656f7b55d89d1b47a22df80935 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 12 Oct 2023 09:52:27 +0000 Subject: [PATCH 64/99] update inventory retrieval for multiple CI environents --- ansible/ci/retrieve_inventory.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/ci/retrieve_inventory.yml b/ansible/ci/retrieve_inventory.yml index 36de71d78..fa3f32211 100644 --- a/ansible/ci/retrieve_inventory.yml +++ b/ansible/ci/retrieve_inventory.yml @@ -7,7 +7,8 @@ gather_facts: no vars: cluster_prefix: "{{ undef(hint='cluster_prefix must be defined') }}" # e.g. ci4005969475 - cluster_network: WCDC-iLab-60 + ci_vars_file: "{{ appliances_environment_root + '/terraform/' + lookup('env', 'CI_CLOUD') }}.tfvars" + cluster_network: "{{ lookup('ansible.builtin.ini', 'cluster_net', file=ci_vars_file, type='properties') | trim('\"') }}" tasks: - name: Get control host IP set_fact: From 46ef0c2b5f6e3d6a061aa58fd5fd1002a14d6993 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 12 Oct 2023 13:17:13 +0000 Subject: [PATCH 65/99] fix basic_users not working in CI --- .../inventory/group_vars/all/{test_user.yml => basic_users.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename environments/.stackhpc/inventory/group_vars/all/{test_user.yml => basic_users.yml} (100%) diff --git a/environments/.stackhpc/inventory/group_vars/all/test_user.yml b/environments/.stackhpc/inventory/group_vars/all/basic_users.yml similarity index 100% rename from environments/.stackhpc/inventory/group_vars/all/test_user.yml rename to environments/.stackhpc/inventory/group_vars/all/basic_users.yml From c2bdb57583d4e38458d1cd84ed928e679beed9cd Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 12 Oct 2023 16:01:17 +0000 Subject: [PATCH 66/99] install freeipa client packages in fatimage build --- ansible/fatimage.yml | 9 +++++++-- ansible/iam.yml | 6 +++++- ansible/roles/freeipa/tasks/client-install.yml | 4 ++++ ansible/roles/freeipa/tasks/{client.yml => enrole.yml} | 4 ---- 4 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 ansible/roles/freeipa/tasks/client-install.yml rename ansible/roles/freeipa/tasks/{client.yml => enrole.yml} (97%) diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index 7f06923d5..7017f87b4 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -27,6 +27,13 @@ state: stopped enabled: false + # - import_playbook: iam.yml + - name: Install freeipa client + import_role: + name: freeipa + tasks_from: client-install.yml + when: "'freeipa_client' in group_names" + # - import_playbook: filesystems.yml - name: nfs dnf: @@ -142,8 +149,6 @@ name: cloudalchemy.grafana tasks_from: install.yml - # - import_playbook: iam.yml - nothing to do - - name: Run post.yml hook vars: appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" diff --git a/ansible/iam.yml b/ansible/iam.yml index 655fc3f73..988e96093 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -19,10 +19,14 @@ gather_facts: yes become: yes tasks: + - name: Install freeipa client + import_role: + name: freeipa + tasks_from: client-install.yml - name: Enrole freeipa clients import_role: name: freeipa - tasks_from: client.yml + tasks_from: enrole.yml - hosts: freeipa_server tags: diff --git a/ansible/roles/freeipa/tasks/client-install.yml b/ansible/roles/freeipa/tasks/client-install.yml new file mode 100644 index 000000000..a164cd26e --- /dev/null +++ b/ansible/roles/freeipa/tasks/client-install.yml @@ -0,0 +1,4 @@ + +- name: Install FreeIPA client package + dnf: + name: ipa-client diff --git a/ansible/roles/freeipa/tasks/client.yml b/ansible/roles/freeipa/tasks/enrole.yml similarity index 97% rename from ansible/roles/freeipa/tasks/client.yml rename to ansible/roles/freeipa/tasks/enrole.yml index 1ff8f57b6..63ca44ae1 100644 --- a/ansible/roles/freeipa/tasks/client.yml +++ b/ansible/roles/freeipa/tasks/enrole.yml @@ -1,9 +1,5 @@ # based on https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/installing_identity_management/assembly_installing-an-idm-client_installing-identity-management -- name: Install FreeIPA client package - dnf: - name: ipa-client - - name: Retrieve persisted keytab from previous enrolement slurp: src: "{{ _freeipa_keytab_backup_path }}" From 63522fed8b672d6f508d9f87ddaa42f9fa538349 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Tue, 17 Oct 2023 15:58:11 +0000 Subject: [PATCH 67/99] use local container image registry for CI to avoid docker.io ratelimits --- environments/.stackhpc/hooks/pre.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml index d26045f2d..3e13edcaa 100644 --- a/environments/.stackhpc/hooks/pre.yml +++ b/environments/.stackhpc/hooks/pre.yml @@ -15,3 +15,30 @@ - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/hosts" - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/group_vars/all/secrets.yml" - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/group_vars/all/test_user.yml" + +- hosts: all:!builder + become: yes + gather_facts: false + name: Use local container image registry to avoid docker.io ratelimits + tasks: + - name: Copy registries.conf + copy: + remote_src: true + src: /etc/containers/registries.conf + dest: /etc/containers/registries.conf.orig + - name: Define local registry + tags: podman + blockinfile: + path: /etc/containers/registries.conf + block: | + [[registry]] + prefix = "192.168.3.95:5000" + location = "192.168.3.95:5000" + insecure = true + - name: Use local registry for unqualified searches + tags: podman + community.general.ini_file: # actually TOML but this is OK here + path: /etc/containers/registries.conf + section: null + option: unqualified-search-registries + value: ['192.168.3.95:5000', 'registry.access.redhat.com', 'registry.redhat.io', 'docker.io'] From 58ed1fdb9f29bcb70056d237f4e7ebf8e7f9f318 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 18 Oct 2023 13:20:39 +0000 Subject: [PATCH 68/99] revise to use Arcus staging pull-through cache --- environments/.stackhpc/hooks/pre.yml | 27 +++++-------------- .../inventory/group_vars/all/podman.yml | 2 ++ 2 files changed, 9 insertions(+), 20 deletions(-) create mode 100644 environments/.stackhpc/inventory/group_vars/all/podman.yml diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml index 3e13edcaa..8693cee25 100644 --- a/environments/.stackhpc/hooks/pre.yml +++ b/environments/.stackhpc/hooks/pre.yml @@ -19,26 +19,13 @@ - hosts: all:!builder become: yes gather_facts: false - name: Use local container image registry to avoid docker.io ratelimits tasks: - - name: Copy registries.conf + - name: Configure Azimuth staging pullthrough container image registry to avoid docker.io ratelimits copy: - remote_src: true - src: /etc/containers/registries.conf - dest: /etc/containers/registries.conf.orig - - name: Define local registry - tags: podman - blockinfile: - path: /etc/containers/registries.conf - block: | + dest: /etc/containers/registries.conf.d/003-arcus-unqualfied-overrides.conf + content: | + unqualified-search-registries = ['{{ podman_registry_address }}', 'registry.access.redhat.com', 'registry.redhat.io', 'docker.io'] + [[registry]] - prefix = "192.168.3.95:5000" - location = "192.168.3.95:5000" - insecure = true - - name: Use local registry for unqualified searches - tags: podman - community.general.ini_file: # actually TOML but this is OK here - path: /etc/containers/registries.conf - section: null - option: unqualified-search-registries - value: ['192.168.3.95:5000', 'registry.access.redhat.com', 'registry.redhat.io', 'docker.io'] + prefix = "{{ podman_registry_address }}" + location = "{{ podman_registry_address }}" diff --git a/environments/.stackhpc/inventory/group_vars/all/podman.yml b/environments/.stackhpc/inventory/group_vars/all/podman.yml new file mode 100644 index 000000000..e9c921ba5 --- /dev/null +++ b/environments/.stackhpc/inventory/group_vars/all/podman.yml @@ -0,0 +1,2 @@ +arcus_podman_registry_address: registry.apps.128-232-222-81.sslip.io/v2/dockerhub-public +podman_registry_address: "{{ arcus_podman_registry_address if ci_cloud == 'ARCUS' else '' }}" From 888c2aca5d775aa3826e06219a81b702cbbbf74e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 18 Oct 2023 13:55:36 +0000 Subject: [PATCH 69/99] use deployhost container registry again --- environments/.stackhpc/hooks/pre.yml | 6 ++++-- environments/.stackhpc/inventory/group_vars/all/podman.yml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml index 8693cee25..34f4fe9af 100644 --- a/environments/.stackhpc/hooks/pre.yml +++ b/environments/.stackhpc/hooks/pre.yml @@ -19,13 +19,15 @@ - hosts: all:!builder become: yes gather_facts: false + tags: podman tasks: - - name: Configure Azimuth staging pullthrough container image registry to avoid docker.io ratelimits + - name: Configure container image registry for unqualified searches to avoid docker.io ratelimits copy: dest: /etc/containers/registries.conf.d/003-arcus-unqualfied-overrides.conf content: | - unqualified-search-registries = ['{{ podman_registry_address }}', 'registry.access.redhat.com', 'registry.redhat.io', 'docker.io'] + unqualified-search-registries = ['{{ podman_registry_address | split('/') | first }}', 'registry.access.redhat.com', 'registry.redhat.io', 'docker.io'] [[registry]] prefix = "{{ podman_registry_address }}" location = "{{ podman_registry_address }}" + insecure = true diff --git a/environments/.stackhpc/inventory/group_vars/all/podman.yml b/environments/.stackhpc/inventory/group_vars/all/podman.yml index e9c921ba5..b9d4109ed 100644 --- a/environments/.stackhpc/inventory/group_vars/all/podman.yml +++ b/environments/.stackhpc/inventory/group_vars/all/podman.yml @@ -1,2 +1,2 @@ -arcus_podman_registry_address: registry.apps.128-232-222-81.sslip.io/v2/dockerhub-public +arcus_podman_registry_address: 192.168.3.95:5000 podman_registry_address: "{{ arcus_podman_registry_address if ci_cloud == 'ARCUS' else '' }}" From 18342ef64977040ec3b941e17245caaa2a9a627b Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 19 Oct 2023 10:02:04 +0000 Subject: [PATCH 70/99] use podman_registry_address only on ARCUS --- environments/.stackhpc/hooks/pre.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml index 34f4fe9af..8f97dbf88 100644 --- a/environments/.stackhpc/hooks/pre.yml +++ b/environments/.stackhpc/hooks/pre.yml @@ -31,3 +31,4 @@ prefix = "{{ podman_registry_address }}" location = "{{ podman_registry_address }}" insecure = true + when: "ci_cloud == 'ARCUS'" From cb80c3ff7e708276344dcda3878661559181922c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 19 Oct 2023 12:58:32 +0000 Subject: [PATCH 71/99] add role cve-2023-41914.yml --- ansible/.gitignore | 2 + ansible/adhoc/cve-2023-41914.yml | 6 +++ ansible/roles/cve-2023-41914/README.md | 32 +++++++++++++++ .../roles/cve-2023-41914/defaults/main.yml | 23 +++++++++++ .../cve-2023-41914/tasks/install-rpms.yml | 40 +++++++++++++++++++ ansible/roles/cve-2023-41914/tasks/main.yml | 4 ++ .../cve-2023-41914/tasks/post-upgrade.yml | 19 +++++++++ .../cve-2023-41914/tasks/pre-upgrade.yml | 34 ++++++++++++++++ .../roles/cve-2023-41914/tasks/shutdown.yml | 1 + .../roles/cve-2023-41914/tasks/validate.yml | 25 ++++++++++++ 10 files changed, 186 insertions(+) create mode 100644 ansible/adhoc/cve-2023-41914.yml create mode 100644 ansible/roles/cve-2023-41914/README.md create mode 100644 ansible/roles/cve-2023-41914/defaults/main.yml create mode 100644 ansible/roles/cve-2023-41914/tasks/install-rpms.yml create mode 100644 ansible/roles/cve-2023-41914/tasks/main.yml create mode 100644 ansible/roles/cve-2023-41914/tasks/post-upgrade.yml create mode 100644 ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml create mode 100644 ansible/roles/cve-2023-41914/tasks/shutdown.yml create mode 100644 ansible/roles/cve-2023-41914/tasks/validate.yml diff --git a/ansible/.gitignore b/ansible/.gitignore index 8ad2ac3ab..6883c6ae5 100644 --- a/ansible/.gitignore +++ b/ansible/.gitignore @@ -42,3 +42,5 @@ roles/* !roles/proxy/** !roles/resolv_conf/ !roles/resolv_conf/** +!roles/cve-2023-41914 +!roles/cve-2023-41914/** diff --git a/ansible/adhoc/cve-2023-41914.yml b/ansible/adhoc/cve-2023-41914.yml new file mode 100644 index 000000000..e4b907d44 --- /dev/null +++ b/ansible/adhoc/cve-2023-41914.yml @@ -0,0 +1,6 @@ +- hosts: openhpc + gather_facts: no + become: yes + tasks: + - import_role: + name: cve-2023-41914 diff --git a/ansible/roles/cve-2023-41914/README.md b/ansible/roles/cve-2023-41914/README.md new file mode 100644 index 000000000..3ed1c0dae --- /dev/null +++ b/ansible/roles/cve-2023-41914/README.md @@ -0,0 +1,32 @@ +# cve-2023-41914 + +This role fixes [Slurm CVE-2023-41914](https://lists.schedmd.com/pipermail/slurm-announce/2023/000100.html): + +> A number of race conditions have been identified within the slurmd/slurmstepd processes that can lead to the user taking ownership of an arbitrary file on the system. A related issue can lead to the user overwriting an arbitrary file on the compute node (although with data that is not directly under their control). A related issue can also lead to the user deleting all files and sub-directories of an arbitrary target directory on the compute node. + +**NB:** It is only suitable for use on systems installed from OpenHPC v2.6.1 (Slurm v22.05). + +At the time of writing, new OpenHPC packages have been built but are not available from the respositories (reference), hence `dnf update ...` is not available. + +This role can be run in two ways: + +1. To remediate an existing system, run `tasks/main.yml`, e.g. using the playbook `ansible/adhoc/cve-2023-41914.yml`. This will: +- Stop all Slurm services +- Backup the slurmdbd mysql database to the volume-backed directory `/var/lib/state/mysql-backups/` on the control node (by default). +- Uninstall the affected packages and install updated rpms from the OpenHPC build system. +- Restart Slurm services. + + **NB**: This playbook will ALWAYS stop and restart Slurm, even if no updates are actually required. + +2. To remediate images during build (i.e no Slurm services are running, no slurm database exists), run `tasks/install-rpms.yml`, e.g. using the following in an environment pre-hook: + +```yaml +- hosts: builder + gather_facts: no + become: yes + tasks: + - name: Fix cve-2023-41914 + import_role: + name: cve-2023-41914 + tasks_from: install-rpms.yml +``` diff --git a/ansible/roles/cve-2023-41914/defaults/main.yml b/ansible/roles/cve-2023-41914/defaults/main.yml new file mode 100644 index 000000000..d8fbd05bd --- /dev/null +++ b/ansible/roles/cve-2023-41914/defaults/main.yml @@ -0,0 +1,23 @@ + +# _cve_2023_41814_installed_slurm: [] +cve_2023_41914_mysql_backup_path: "{{ mysql_datadir }}-backups/{{ lookup('pipe', 'date --iso-8601=seconds') }}.sql" + +# slurm-contribs-ohpc-22.05.10-2.1.ohpc.2.6.2.x86_64.rpm +cve_2023_41914_rpm_url: http://obs.openhpc.community:82/OpenHPC:/2.6.2:/Factory/EL_8/x86_64 +cve_2023_41914_rpms: # see cve_2023_41914_rpm_url + slurm-ohpc: ['22.05.10', 2.1.ohpc.2.6.2] # has to be first as dependency + slurm-contribs-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-devel-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-example-configs-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-libpmi-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-ohpc-slurmrestd: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-openlava-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-pam_slurm-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-perlapi-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-slurmctld-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-slurmd-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-slurmdbd-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-sview-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + slurm-torque-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] +_cve_2023_41814_updates: [] +cve_2023_41914_pkglist_path: "{{ appliances_environment_root }}/{{ inventory_hostname }}-cve_2023_41814_updates" diff --git a/ansible/roles/cve-2023-41914/tasks/install-rpms.yml b/ansible/roles/cve-2023-41914/tasks/install-rpms.yml new file mode 100644 index 000000000..4815ea0fc --- /dev/null +++ b/ansible/roles/cve-2023-41914/tasks/install-rpms.yml @@ -0,0 +1,40 @@ +- name: Identify packages to update + set_fact: + _cve_2023_41814_updates: "{{ _cve_2023_41814_updates + [item.key] }}" + loop: "{{ cve_2023_41914_rpms | dict2items }}" + loop_control: + label: "{{ item.key }}" + when: + - item.key in ansible_facts.packages + - item.value[0] is version(ansible_facts.packages[item.key][0].version, '>') + +- name: Write packages to be modified to a file + # allows recovery from failures in subsequent package deletion/rpm install + copy: + dest: "{{ cve_2023_41914_pkglist_path }}" + content: "{{ _cve_2023_41814_updates | to_nice_yaml }}" + when: _cve_2023_41814_updates | length > 0 + delegate_to: localhost + +- name: Read packages to modify + set_fact: + _cve_2023_41814_updates: "{{ lookup('file', cve_2023_41914_pkglist_path) | from_yaml }}" + +- name: Identify architecture + setup: + gather_subset: architecture + +- name: Remove installed packages + dnf: + name: "{{ _cve_2023_41814_updates }}" + state: absent + +- name: Install rpms + dnf: + name: "{{ cve_2023_41914_rpm_url }}/{{ item }}-{{ cve_2023_41914_rpms[item] | join('-') }}.{{ ansible_architecture }}.rpm" + loop: "{{ _cve_2023_41814_updates }}" + register: _cve_2023_41814_rpm_installs + +- name: Reload systemd units + command: systemctl daemon-reload + when: _cve_2023_41814_rpm_installs.changed diff --git a/ansible/roles/cve-2023-41914/tasks/main.yml b/ansible/roles/cve-2023-41914/tasks/main.yml new file mode 100644 index 000000000..83053baab --- /dev/null +++ b/ansible/roles/cve-2023-41914/tasks/main.yml @@ -0,0 +1,4 @@ +- include_tasks: validate.yml +- include_tasks: pre-upgrade.yml +- include_tasks: install-rpms.yml +- include_tasks: post-upgrade.yml diff --git a/ansible/roles/cve-2023-41914/tasks/post-upgrade.yml b/ansible/roles/cve-2023-41914/tasks/post-upgrade.yml new file mode 100644 index 000000000..d9540faa0 --- /dev/null +++ b/ansible/roles/cve-2023-41914/tasks/post-upgrade.yml @@ -0,0 +1,19 @@ +- name: Start slurmdbd + systemd: + name: slurmdbd + state: started + # NB: this approach is only suitable for minor version upgrades + # major ones may timeout on service start due to db upgrades + when: openhpc_enable.database | default('false') | bool + +- name: Start slurmctld + systemd: + name: slurmctld + state: started + when: openhpc_enable.control | default('false') | bool + +- name: Start slurmd + systemd: + name: slurmd + state: started + when: openhpc_enable.batch | default('false') | bool or 'login' in group_names diff --git a/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml b/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml new file mode 100644 index 000000000..caf3f6f53 --- /dev/null +++ b/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml @@ -0,0 +1,34 @@ +- name: Shut down slurm + systemd: + name: "{{ item }}" + state: stopped + register: _stop + failed_when: "'msg' in _stop and 'Could not find the requested service' not in _stop.msg" + # ignore_errors: true + loop: + - slurmd + - slurmctld + - slurmdbd + +- name: Ensure backup directory exists + file: + path: "{{ cve_2023_41914_mysql_backup_path | dirname }}" + state: directory + owner: root + group: root + when: openhpc_enable.control | default(false) | bool + +- name: Ensure mysqldump tool installed + dnf: + name: mysql + when: openhpc_enable.control | default(false) | bool + +- name: Backup database + community.mysql.mysql_db: + name: slurm_acct_db + state: dump + target: "{{ cve_2023_41914_mysql_backup_path }}" + login_user: root + login_password: "{{ mysql_root_password }}" + login_host: "{{ mysql_host }}" + when: openhpc_enable.control | default(false) | bool diff --git a/ansible/roles/cve-2023-41914/tasks/shutdown.yml b/ansible/roles/cve-2023-41914/tasks/shutdown.yml new file mode 100644 index 000000000..70d40527d --- /dev/null +++ b/ansible/roles/cve-2023-41914/tasks/shutdown.yml @@ -0,0 +1 @@ +- name: Shut down \ No newline at end of file diff --git a/ansible/roles/cve-2023-41914/tasks/validate.yml b/ansible/roles/cve-2023-41914/tasks/validate.yml new file mode 100644 index 000000000..41a04c24e --- /dev/null +++ b/ansible/roles/cve-2023-41914/tasks/validate.yml @@ -0,0 +1,25 @@ +- name: Get package facts + package_facts: + +- name: Set fact for installed Slurm packages + # this is a subset (same format) as ansible_facts.packages + set_fact: + _cve_2023_41814_installed_pkgs: "{{ ansible_facts.packages | dict2items | selectattr('key', 'match', 'slurm-') | items2dict }}" + +# - debug: +# var: _cve_2023_41814_installed_pkgs + +- name: Ensure only a single version of all slurm-* packages is installed + assert: + that: item.value | length == 1 + loop: "{{ _cve_2023_41814_installed_pkgs | dict2items }}" + +- name: Ensure major version of installed Slurm matches upgrade + assert: + that: _slurm_installed_major_ver == ['22', '05'] + fail_msg: "{{ item.key }} has major version {{ _slurm_installed_major_ver | join('.') }}, expecting 22.05" + loop: "{{ _cve_2023_41814_installed_pkgs | dict2items }}" + when: item.key.startswith('slurm') + vars: + _slurm_installed_major_ver: "{{ item.value[0].version.split('.')[0:2] }}" + From 8c65a4327cf40048422e213c0404cac629e08f0d Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 19 Oct 2023 14:30:04 +0000 Subject: [PATCH 72/99] fix cve-2023-41914 in fatimage build --- environments/.stackhpc/hooks/post.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 environments/.stackhpc/hooks/post.yml diff --git a/environments/.stackhpc/hooks/post.yml b/environments/.stackhpc/hooks/post.yml new file mode 100644 index 000000000..48223a5a4 --- /dev/null +++ b/environments/.stackhpc/hooks/post.yml @@ -0,0 +1,8 @@ +- hosts: builder + gather_facts: no + become: yes + tasks: + - name: Fix cve-2023-41914 + import_role: + name: cve-2023-41914 + tasks_from: install-rpms.yml From c147d9ca64ae05005ff939cdb782690ac7820583 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 09:49:47 +0000 Subject: [PATCH 73/99] report image name early in build --- ansible/fatimage.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index 7f06923d5..ae392dde3 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -1,5 +1,13 @@ # Builder version of site.yml just installing binaries +- hosts: builder + become: no + gather_facts: no + tasks: + - name: Report hostname (= final image name) + command: hostname + register: _hostname + - name: Run pre.yml hook vars: appliances_environment_root: "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}" From 13c696535b5d752bdb4fac75fa532919a33a87e7 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 09:59:04 +0000 Subject: [PATCH 74/99] make packer error behaviour controllable --- .github/workflows/fatimage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 8c5a5353d..3b7bcfdcb 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -47,7 +47,7 @@ jobs: . environments/.stackhpc/activate cd packer/ packer init . - PACKER_LOG=1 packer build -only openstack.openhpc -on-error=ask -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl openstack.pkr.hcl + PACKER_LOG=1 packer build -only openstack.openhpc -on-error=${{ vars.PACKER_ON_ERROR }} -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl openstack.pkr.hcl - name: Get created image name from manifest id: manifest From 7803e7442011241835538c0aa741186eec77d6db Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 09:59:53 +0000 Subject: [PATCH 75/99] add ansible profiling in stackhpc environment --- environments/.stackhpc/ansible.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/environments/.stackhpc/ansible.cfg b/environments/.stackhpc/ansible.cfg index 2a12e06b6..139ffa033 100644 --- a/environments/.stackhpc/ansible.cfg +++ b/environments/.stackhpc/ansible.cfg @@ -2,6 +2,7 @@ any_errors_fatal = True stdout_callback = debug stderr_callback = debug +callbacks_enabled = ansible.posix.profile_tasks gathering = smart forks = 30 host_key_checking = False From c89fa839cd7a8d6eb617b6f09d6f4add939758c9 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 10:30:42 +0000 Subject: [PATCH 76/99] fix stackhpc cve-2023-41914 build --- ansible/roles/cve-2023-41914/README.md | 8 ++++++-- environments/.stackhpc/hooks/post.yml | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ansible/roles/cve-2023-41914/README.md b/ansible/roles/cve-2023-41914/README.md index 3ed1c0dae..9c2c282ec 100644 --- a/ansible/roles/cve-2023-41914/README.md +++ b/ansible/roles/cve-2023-41914/README.md @@ -18,14 +18,18 @@ This role can be run in two ways: **NB**: This playbook will ALWAYS stop and restart Slurm, even if no updates are actually required. -2. To remediate images during build (i.e no Slurm services are running, no slurm database exists), run `tasks/install-rpms.yml`, e.g. using the following in an environment pre-hook: +2. To remediate images during build (i.e no Slurm services are running, no slurm database exists), run `tasks/validate.yml` then `tasks/install-rpms.yml`, e.g. using the following in an environment pre-hook: ```yaml - hosts: builder gather_facts: no become: yes tasks: - - name: Fix cve-2023-41914 + - name: Check fixes for cve-2023-41914 can be applied + import_role: + name: cve-2023-41914 + tasks_from: validate.yml + - name: Apply fixes for cve-2023-41914 import_role: name: cve-2023-41914 tasks_from: install-rpms.yml diff --git a/environments/.stackhpc/hooks/post.yml b/environments/.stackhpc/hooks/post.yml index 48223a5a4..22cdc38f7 100644 --- a/environments/.stackhpc/hooks/post.yml +++ b/environments/.stackhpc/hooks/post.yml @@ -2,7 +2,11 @@ gather_facts: no become: yes tasks: - - name: Fix cve-2023-41914 + - name: Check fixes for cve-2023-41914 can be applied + import_role: + name: cve-2023-41914 + tasks_from: validate.yml + - name: Apply fixes for cve-2023-41914 import_role: name: cve-2023-41914 tasks_from: install-rpms.yml From 28bdf13feef5c0cce1ca7f78de95142b2ca0d596 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 11:14:28 +0000 Subject: [PATCH 77/99] bump image --- environments/.stackhpc/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index d6806baa3..77550a38f 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -13,7 +13,7 @@ variable "cluster_name" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-230926-1343-e3d3e307" # https://github.com/stackhpc/ansible-slurm-appliance/pull/314 + default = "openhpc-231020-1032-c89fa839" # https://github.com/stackhpc/ansible-slurm-appliance/pull/320 # default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2" # default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" } From d8286524fb0c1fe2216d416e3b4cf05f14634d98 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 13:13:00 +0000 Subject: [PATCH 78/99] delete accidental file --- ansible/roles/cve-2023-41914/tasks/shutdown.yml | 1 - 1 file changed, 1 deletion(-) delete mode 100644 ansible/roles/cve-2023-41914/tasks/shutdown.yml diff --git a/ansible/roles/cve-2023-41914/tasks/shutdown.yml b/ansible/roles/cve-2023-41914/tasks/shutdown.yml deleted file mode 100644 index 70d40527d..000000000 --- a/ansible/roles/cve-2023-41914/tasks/shutdown.yml +++ /dev/null @@ -1 +0,0 @@ -- name: Shut down \ No newline at end of file From d35ef7f23ed1d205952938f476926458899195f4 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 13:21:09 +0000 Subject: [PATCH 79/99] address PR comments --- ansible/fatimage.yml | 1 - .../cve-2023-41914/tasks/pre-upgrade.yml | 24 ++++++++++++------- .../roles/cve-2023-41914/tasks/validate.yml | 3 --- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index ae392dde3..511bc3b82 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -6,7 +6,6 @@ tasks: - name: Report hostname (= final image name) command: hostname - register: _hostname - name: Run pre.yml hook vars: diff --git a/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml b/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml index caf3f6f53..59629a482 100644 --- a/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml +++ b/ansible/roles/cve-2023-41914/tasks/pre-upgrade.yml @@ -1,14 +1,20 @@ -- name: Shut down slurm +- name: Stop slurmd systemd: - name: "{{ item }}" + name: slurmd state: stopped - register: _stop - failed_when: "'msg' in _stop and 'Could not find the requested service' not in _stop.msg" - # ignore_errors: true - loop: - - slurmd - - slurmctld - - slurmdbd + when: openhpc_enable.batch | default('false') | bool or 'login' in group_names + +- name: Stop slurmctld + systemd: + name: slurmctld + state: stopped + when: openhpc_enable.control | default('false') | bool + +- name: Stop slurmdbd + systemd: + name: slurmdbd + state: stopped + when: openhpc_enable.database | default('false') | bool - name: Ensure backup directory exists file: diff --git a/ansible/roles/cve-2023-41914/tasks/validate.yml b/ansible/roles/cve-2023-41914/tasks/validate.yml index 41a04c24e..5da1afdc2 100644 --- a/ansible/roles/cve-2023-41914/tasks/validate.yml +++ b/ansible/roles/cve-2023-41914/tasks/validate.yml @@ -5,9 +5,6 @@ # this is a subset (same format) as ansible_facts.packages set_fact: _cve_2023_41814_installed_pkgs: "{{ ansible_facts.packages | dict2items | selectattr('key', 'match', 'slurm-') | items2dict }}" - -# - debug: -# var: _cve_2023_41814_installed_pkgs - name: Ensure only a single version of all slurm-* packages is installed assert: From fe6ebaa703851d0562da37c564c17c27b04a9d7c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 13:51:36 +0000 Subject: [PATCH 80/99] simplify cve_2023_41914_rpms --- .../roles/cve-2023-41914/defaults/main.yml | 31 ++++++++++--------- .../cve-2023-41914/tasks/install-rpms.yml | 12 +++---- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/ansible/roles/cve-2023-41914/defaults/main.yml b/ansible/roles/cve-2023-41914/defaults/main.yml index d8fbd05bd..685c6619c 100644 --- a/ansible/roles/cve-2023-41914/defaults/main.yml +++ b/ansible/roles/cve-2023-41914/defaults/main.yml @@ -2,22 +2,23 @@ # _cve_2023_41814_installed_slurm: [] cve_2023_41914_mysql_backup_path: "{{ mysql_datadir }}-backups/{{ lookup('pipe', 'date --iso-8601=seconds') }}.sql" -# slurm-contribs-ohpc-22.05.10-2.1.ohpc.2.6.2.x86_64.rpm cve_2023_41914_rpm_url: http://obs.openhpc.community:82/OpenHPC:/2.6.2:/Factory/EL_8/x86_64 cve_2023_41914_rpms: # see cve_2023_41914_rpm_url - slurm-ohpc: ['22.05.10', 2.1.ohpc.2.6.2] # has to be first as dependency - slurm-contribs-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-devel-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-example-configs-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-libpmi-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-ohpc-slurmrestd: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-openlava-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-pam_slurm-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-perlapi-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-slurmctld-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-slurmd-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-slurmdbd-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-sview-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] - slurm-torque-ohpc: ['22.05.10', '2.1.ohpc.2.6.2'] + - slurm-ohpc # has to be first as dependency + - slurm-contribs-ohpc + - slurm-devel-ohpc + - slurm-example-configs-ohpc + - slurm-libpmi-ohpc + - slurm-ohpc-slurmrestd + - slurm-openlava-ohpc + - slurm-pam_slurm-ohpc + - slurm-perlapi-ohpc + - slurm-slurmctld-ohpc + - slurm-slurmd-ohpc + - slurm-slurmdbd-ohpc + - slurm-sview-ohpc + - slurm-torque-ohpc +cve_2023_41914_rpm_fix_ver: '22.05.10' +cve_2023_41914_rpm_fix_release: '2.1.ohpc.2.6.2' _cve_2023_41814_updates: [] cve_2023_41914_pkglist_path: "{{ appliances_environment_root }}/{{ inventory_hostname }}-cve_2023_41814_updates" diff --git a/ansible/roles/cve-2023-41914/tasks/install-rpms.yml b/ansible/roles/cve-2023-41914/tasks/install-rpms.yml index 4815ea0fc..46bda94b9 100644 --- a/ansible/roles/cve-2023-41914/tasks/install-rpms.yml +++ b/ansible/roles/cve-2023-41914/tasks/install-rpms.yml @@ -1,12 +1,10 @@ - name: Identify packages to update set_fact: - _cve_2023_41814_updates: "{{ _cve_2023_41814_updates + [item.key] }}" - loop: "{{ cve_2023_41914_rpms | dict2items }}" - loop_control: - label: "{{ item.key }}" + _cve_2023_41814_updates: "{{ _cve_2023_41814_updates + [item] }}" + loop: "{{ cve_2023_41914_rpms }}" when: - - item.key in ansible_facts.packages - - item.value[0] is version(ansible_facts.packages[item.key][0].version, '>') + - item in ansible_facts.packages + - cve_2023_41914_rpm_fix_ver is version(ansible_facts.packages[item][0].version, '>') - name: Write packages to be modified to a file # allows recovery from failures in subsequent package deletion/rpm install @@ -31,7 +29,7 @@ - name: Install rpms dnf: - name: "{{ cve_2023_41914_rpm_url }}/{{ item }}-{{ cve_2023_41914_rpms[item] | join('-') }}.{{ ansible_architecture }}.rpm" + name: "{{ cve_2023_41914_rpm_url }}/{{ item }}-{{ cve_2023_41914_rpm_fix_ver }}-{{ cve_2023_41914_rpm_fix_release }}.{{ ansible_architecture }}.rpm" loop: "{{ _cve_2023_41814_updates }}" register: _cve_2023_41814_rpm_installs From b5d8b056cbe8bdb8f369a266755fe796d15bcb13 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 13:55:35 +0000 Subject: [PATCH 81/99] run validate automatically when running install-rpms task --- ansible/roles/cve-2023-41914/README.md | 6 +----- ansible/roles/cve-2023-41914/tasks/install-rpms.yml | 4 ++++ environments/.stackhpc/hooks/post.yml | 4 ---- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/ansible/roles/cve-2023-41914/README.md b/ansible/roles/cve-2023-41914/README.md index 9c2c282ec..02c650857 100644 --- a/ansible/roles/cve-2023-41914/README.md +++ b/ansible/roles/cve-2023-41914/README.md @@ -18,17 +18,13 @@ This role can be run in two ways: **NB**: This playbook will ALWAYS stop and restart Slurm, even if no updates are actually required. -2. To remediate images during build (i.e no Slurm services are running, no slurm database exists), run `tasks/validate.yml` then `tasks/install-rpms.yml`, e.g. using the following in an environment pre-hook: +2. To remediate images during build (i.e no Slurm services are running, no slurm database exists), run `tasks/install-rpms.yml`, e.g. using the following in an environment pre-hook: ```yaml - hosts: builder gather_facts: no become: yes tasks: - - name: Check fixes for cve-2023-41914 can be applied - import_role: - name: cve-2023-41914 - tasks_from: validate.yml - name: Apply fixes for cve-2023-41914 import_role: name: cve-2023-41914 diff --git a/ansible/roles/cve-2023-41914/tasks/install-rpms.yml b/ansible/roles/cve-2023-41914/tasks/install-rpms.yml index 46bda94b9..42168fd9b 100644 --- a/ansible/roles/cve-2023-41914/tasks/install-rpms.yml +++ b/ansible/roles/cve-2023-41914/tasks/install-rpms.yml @@ -1,3 +1,7 @@ +- name: Validate suitability + include_tasks: validate.yml + when: _cve_2023_41814_installed_pkgs is undefined + - name: Identify packages to update set_fact: _cve_2023_41814_updates: "{{ _cve_2023_41814_updates + [item] }}" diff --git a/environments/.stackhpc/hooks/post.yml b/environments/.stackhpc/hooks/post.yml index 22cdc38f7..2c479e087 100644 --- a/environments/.stackhpc/hooks/post.yml +++ b/environments/.stackhpc/hooks/post.yml @@ -2,10 +2,6 @@ gather_facts: no become: yes tasks: - - name: Check fixes for cve-2023-41914 can be applied - import_role: - name: cve-2023-41914 - tasks_from: validate.yml - name: Apply fixes for cve-2023-41914 import_role: name: cve-2023-41914 From 4c6aa82aa19fab7acdfb137444e213ff22d7317c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 20 Oct 2023 14:38:08 +0000 Subject: [PATCH 82/99] bump image --- environments/.stackhpc/terraform/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index 77550a38f..ef8c1281e 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -13,7 +13,7 @@ variable "cluster_name" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-231020-1032-c89fa839" # https://github.com/stackhpc/ansible-slurm-appliance/pull/320 + default = "openhpc-231020-1357-b5d8b056" # https://github.com/stackhpc/ansible-slurm-appliance/pull/320 # default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2" # default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" } From dd0c48b38151c9d52453a422281b94ef5b643ca2 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 27 Oct 2023 08:26:01 +0000 Subject: [PATCH 83/99] remove cve-2023-41914 hook for fatimage build now OpenHPC packages released --- environments/.stackhpc/hooks/post.yml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 environments/.stackhpc/hooks/post.yml diff --git a/environments/.stackhpc/hooks/post.yml b/environments/.stackhpc/hooks/post.yml deleted file mode 100644 index 2c479e087..000000000 --- a/environments/.stackhpc/hooks/post.yml +++ /dev/null @@ -1,8 +0,0 @@ -- hosts: builder - gather_facts: no - become: yes - tasks: - - name: Apply fixes for cve-2023-41914 - import_role: - name: cve-2023-41914 - tasks_from: install-rpms.yml From 3a094bab0bfb4bc3ca6d60399de380371c2ddd5d Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 27 Oct 2023 08:26:57 +0000 Subject: [PATCH 84/99] bump fatimage source to Rocky8.8 to speedup build --- environments/.stackhpc/ARCUS.pkrvars.hcl | 2 +- environments/.stackhpc/SMS.pkrvars.hcl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/ARCUS.pkrvars.hcl b/environments/.stackhpc/ARCUS.pkrvars.hcl index a1ea7ef64..78f007753 100644 --- a/environments/.stackhpc/ARCUS.pkrvars.hcl +++ b/environments/.stackhpc/ARCUS.pkrvars.hcl @@ -4,7 +4,7 @@ volume_size = 10 # GB image_disk_format = "qcow2" networks = ["4b6b2722-ee5b-40ec-8e52-a6610e14cc51"] # portal-internal (DNS broken on ilab-60) source_image_name = "openhpc-230804-1754-80b8d714" # https://github.com/stackhpc/ansible-slurm-appliance/pull/298 -fatimage_source_image_name = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" +fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.8-20230518.0.x86_64.qcow2" ssh_keypair_name = "slurm-app-ci" ssh_private_key_file = "~/.ssh/id_rsa" security_groups = ["default", "SSH"] diff --git a/environments/.stackhpc/SMS.pkrvars.hcl b/environments/.stackhpc/SMS.pkrvars.hcl index 936e3b736..a62d79929 100644 --- a/environments/.stackhpc/SMS.pkrvars.hcl +++ b/environments/.stackhpc/SMS.pkrvars.hcl @@ -1,7 +1,7 @@ flavor = "general.v1.tiny" networks = ["26023e3d-bc8e-459c-8def-dbd47ab01756"] # stackhpc-ipv4-geneve source_image_name = "openhpc-230503-0944-bf8c3f63" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252 -fatimage_source_image_name = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" +fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.8-20230518.0.x86_64.qcow2" ssh_keypair_name = "slurm-app-ci" ssh_private_key_file = "~/.ssh/id_rsa" security_groups = ["default", "SSH"] From 893570de7e1fd4d6a2e01d74e2b043f1ae01361b Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 27 Oct 2023 09:14:00 +0000 Subject: [PATCH 85/99] WIP: test new ohpc version --- requirements.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.yml b/requirements.yml index 93be4c9f9..ffda3b4fe 100644 --- a/requirements.yml +++ b/requirements.yml @@ -3,7 +3,7 @@ roles: - src: stackhpc.nfs version: v22.9.1 - src: https://github.com/stackhpc/ansible-role-openhpc.git - version: v0.20.0 # Allow multiple empty partitions by @sjpb in #156 + version: fix/singularity-ce # TODO: bump when merged name: stackhpc.openhpc - src: https://github.com/stackhpc/ansible-node-exporter.git version: feature/no-install From 46ca110025938dbc34d66ce9e64eb00162ec3ee0 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 27 Oct 2023 09:49:08 +0000 Subject: [PATCH 86/99] bump image --- environments/.stackhpc/terraform/main.tf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index ef8c1281e..07a42a915 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -13,9 +13,8 @@ variable "cluster_name" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-231020-1357-b5d8b056" # https://github.com/stackhpc/ansible-slurm-appliance/pull/320 - # default = "Rocky-8-GenericCloud-Base-8.7-20221130.0.x86_64.qcow2" - # default = "Rocky-8-GenericCloud-8.6.20220702.0.x86_64.qcow2" + default = "openhpc-231027-0916-893570de" # https://github.com/stackhpc/ansible-slurm-appliance/pull/324 + # default = "Rocky-8-GenericCloud-Base-8.8-20230518.0.x86_64.qcow2" } variable "cluster_net" {} From 6e561c5b24b6e9520389c333f649860d2c678479 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 27 Oct 2023 09:57:19 +0000 Subject: [PATCH 87/99] move EESSI to extras --- ansible/bootstrap.yml | 10 ---------- ansible/extras.yml | 13 ++++++++++++- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index eb480f926..485c637f5 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -112,16 +112,6 @@ tasks_from: config.yml tags: config -- name: Setup EESSI - hosts: eessi - tags: eessi - become: true - gather_facts: false - tasks: - - name: Install and configure EESSI - import_role: - name: eessi - - hosts: update gather_facts: false become: yes diff --git a/ansible/extras.yml b/ansible/extras.yml index 28dbb46cb..efb39f40f 100644 --- a/ansible/extras.yml +++ b/ansible/extras.yml @@ -1,4 +1,15 @@ -- hosts: cuda +- name: Setup EESSI + hosts: eessi + tags: eessi + become: true + gather_facts: false + tasks: + - name: Install and configure EESSI + import_role: + name: eessi + +- name: Setup CUDA + hosts: cuda become: yes gather_facts: no tags: cuda From 85448b832c3fbcbaa99158de199b2b2eb44a598e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 27 Oct 2023 10:23:20 +0000 Subject: [PATCH 88/99] fix stackhpc env when running from genericcloud image --- environments/.stackhpc/hooks/post-bootstrap.yml | 16 ++++++++++++++++ environments/.stackhpc/hooks/pre.yml | 17 ----------------- 2 files changed, 16 insertions(+), 17 deletions(-) create mode 100644 environments/.stackhpc/hooks/post-bootstrap.yml diff --git a/environments/.stackhpc/hooks/post-bootstrap.yml b/environments/.stackhpc/hooks/post-bootstrap.yml new file mode 100644 index 000000000..fe783e469 --- /dev/null +++ b/environments/.stackhpc/hooks/post-bootstrap.yml @@ -0,0 +1,16 @@ +- hosts: podman:!builder + become: yes + gather_facts: false + tags: podman + tasks: + - name: Configure container image registry for unqualified searches to avoid docker.io ratelimits + copy: + dest: /etc/containers/registries.conf.d/003-arcus-unqualfied-overrides.conf + content: | + unqualified-search-registries = ['{{ podman_registry_address | split('/') | first }}', 'registry.access.redhat.com', 'registry.redhat.io', 'docker.io'] + + [[registry]] + prefix = "{{ podman_registry_address }}" + location = "{{ podman_registry_address }}" + insecure = true + when: "ci_cloud == 'ARCUS'" diff --git a/environments/.stackhpc/hooks/pre.yml b/environments/.stackhpc/hooks/pre.yml index 8f97dbf88..d26045f2d 100644 --- a/environments/.stackhpc/hooks/pre.yml +++ b/environments/.stackhpc/hooks/pre.yml @@ -15,20 +15,3 @@ - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/hosts" - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/group_vars/all/secrets.yml" - "{{ lookup('env', 'APPLIANCES_ENVIRONMENT_ROOT') }}/inventory/group_vars/all/test_user.yml" - -- hosts: all:!builder - become: yes - gather_facts: false - tags: podman - tasks: - - name: Configure container image registry for unqualified searches to avoid docker.io ratelimits - copy: - dest: /etc/containers/registries.conf.d/003-arcus-unqualfied-overrides.conf - content: | - unqualified-search-registries = ['{{ podman_registry_address | split('/') | first }}', 'registry.access.redhat.com', 'registry.redhat.io', 'docker.io'] - - [[registry]] - prefix = "{{ podman_registry_address }}" - location = "{{ podman_registry_address }}" - insecure = true - when: "ci_cloud == 'ARCUS'" From b3b6f44a8bde4a3c9a7471e547ffb3dd5332708d Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Fri, 27 Oct 2023 12:53:37 +0000 Subject: [PATCH 89/99] bump openhpc role after merge --- requirements.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.yml b/requirements.yml index ffda3b4fe..3f65a27ac 100644 --- a/requirements.yml +++ b/requirements.yml @@ -3,7 +3,7 @@ roles: - src: stackhpc.nfs version: v22.9.1 - src: https://github.com/stackhpc/ansible-role-openhpc.git - version: fix/singularity-ce # TODO: bump when merged + version: v0.23.0 # https://github.com/stackhpc/ansible-role-openhpc/pull/165 name: stackhpc.openhpc - src: https://github.com/stackhpc/ansible-node-exporter.git version: feature/no-install From 89d282dc52c7531dcfb73a635b429cbfb7fa1a2d Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 1 Nov 2023 10:11:42 +0000 Subject: [PATCH 90/99] move freeipa_server validation so it runs --- ansible/roles/freeipa/tasks/server.yml | 11 +++++++++++ ansible/roles/freeipa/tasks/validate.yml | 13 ------------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml index 2f14e1a6f..f79534f27 100644 --- a/ansible/roles/freeipa/tasks/server.yml +++ b/ansible/roles/freeipa/tasks/server.yml @@ -1,5 +1,16 @@ # Based on https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/installing_identity_management/preparing-the-system-for-ipa-server-installation_installing-identity-management#host-name-and-dns-requirements-for-ipa_preparing-the-system-for-ipa-server-installation +- name: Get hostname as reported by command + command: hostname + register: _freeipa_validate_hostname + changed_when: false + +- name: Ensure hostname is fully-qualified + # see section 2.7 of redhat guide to installing identity management + assert: + that: _freeipa_validate_hostname.stdout | split('.') | length >= 3 + fail_msg: "freeipa_server hostname '{{ _freeipa_validate_hostname.stdout }}' is not fully-qualified (a.b.c)" + - name: Install freeipa server packages dnf: name: '@idm:DL1/dns' diff --git a/ansible/roles/freeipa/tasks/validate.yml b/ansible/roles/freeipa/tasks/validate.yml index a7806606a..90fbcf328 100644 --- a/ansible/roles/freeipa/tasks/validate.yml +++ b/ansible/roles/freeipa/tasks/validate.yml @@ -1,16 +1,3 @@ -- name: Get hostname as reported by command - command: hostname - register: _freeipa_validate_hostname - changed_when: false - when: "'freeipa_server' in group_names" - -- name: Ensure hostname is fully-qualified - # see section 2.7 of redhat guide to installing identity management - assert: - that: _freeipa_validate_hostname.stdout | split('.') | length >= 3 - fail_msg: "FreeIPA server hostname '{{ _freeipa_validate_hostname.stdout }}' is not fully-qualified (a.b.c)" - when: "'freeipa_server' in group_names" - - name: Ensure control node has persistent storage defined assert: that: "{{ 'appliances_state_dir' in hostvars[groups['control'] | first ] }}" From 014c70dc52a36f00ff2b5807d3349449ecc02d2e Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 1 Nov 2023 10:12:18 +0000 Subject: [PATCH 91/99] add check for virtual servers in freeipa_server --- ansible/roles/freeipa/tasks/server.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml index f79534f27..401ffaca2 100644 --- a/ansible/roles/freeipa/tasks/server.yml +++ b/ansible/roles/freeipa/tasks/server.yml @@ -11,6 +11,22 @@ that: _freeipa_validate_hostname.stdout | split('.') | length >= 3 fail_msg: "freeipa_server hostname '{{ _freeipa_validate_hostname.stdout }}' is not fully-qualified (a.b.c)" +- name: Check for virtual servers in httpd configuration of freeipa_server + # e.g. fatimage with OOD config; community.general.ipa_host fails with "401 Unauthorized: No session cookie found" + # https://lists.fedoraproject.org/archives/list/freeipa-users@lists.fedorahosted.org/message/7RH7XDFR35KDPYJ7AQCQI2H2EOWIZCWA/ + find: + path: /etc/httpd/conf.d/ + contains: ' Date: Wed, 1 Nov 2023 10:18:31 +0000 Subject: [PATCH 92/99] move freeipa validation back to validate task --- ansible/roles/freeipa/tasks/server.yml | 27 --------------------- ansible/roles/freeipa/tasks/validate.yml | 31 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml index 401ffaca2..01a7c039f 100644 --- a/ansible/roles/freeipa/tasks/server.yml +++ b/ansible/roles/freeipa/tasks/server.yml @@ -1,32 +1,5 @@ # Based on https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/installing_identity_management/preparing-the-system-for-ipa-server-installation_installing-identity-management#host-name-and-dns-requirements-for-ipa_preparing-the-system-for-ipa-server-installation -- name: Get hostname as reported by command - command: hostname - register: _freeipa_validate_hostname - changed_when: false - -- name: Ensure hostname is fully-qualified - # see section 2.7 of redhat guide to installing identity management - assert: - that: _freeipa_validate_hostname.stdout | split('.') | length >= 3 - fail_msg: "freeipa_server hostname '{{ _freeipa_validate_hostname.stdout }}' is not fully-qualified (a.b.c)" - -- name: Check for virtual servers in httpd configuration of freeipa_server - # e.g. fatimage with OOD config; community.general.ipa_host fails with "401 Unauthorized: No session cookie found" - # https://lists.fedoraproject.org/archives/list/freeipa-users@lists.fedorahosted.org/message/7RH7XDFR35KDPYJ7AQCQI2H2EOWIZCWA/ - find: - path: /etc/httpd/conf.d/ - contains: '= 3 + fail_msg: "freeipa_server hostname '{{ _freeipa_validate_hostname.stdout }}' is not fully-qualified (a.b.c)" + when: "'freeipa_server' in group_names" + +- name: Check for virtual servers in httpd configuration of freeipa_server + # e.g. fatimage with OOD config; community.general.ipa_host fails with "401 Unauthorized: No session cookie found" + # https://lists.fedoraproject.org/archives/list/freeipa-users@lists.fedorahosted.org/message/7RH7XDFR35KDPYJ7AQCQI2H2EOWIZCWA/ + find: + path: /etc/httpd/conf.d/ + contains: ' Date: Wed, 1 Nov 2023 10:20:08 +0000 Subject: [PATCH 93/99] don't log freeipa server passwords --- ansible/roles/freeipa/tasks/server.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ansible/roles/freeipa/tasks/server.yml b/ansible/roles/freeipa/tasks/server.yml index 01a7c039f..33e15733d 100644 --- a/ansible/roles/freeipa/tasks/server.yml +++ b/ansible/roles/freeipa/tasks/server.yml @@ -23,8 +23,7 @@ --no-ntp --unattended --no-ui-redirect - # TODO: add no_log here as password exposed - + no_log: "{{ no_log | default(true) }}" register: _ipa_server_install changed_when: _ipa_server_install.rc == 0 failed_when: > From 5618d4efb206945981d1a6f638a99596a049b28f Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 1 Nov 2023 10:20:33 +0000 Subject: [PATCH 94/99] fix freeipa testuser password to match normal CI/basic_users_users usage --- environments/.stackhpc/inventory/group_vars/all/freeipa.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/.stackhpc/inventory/group_vars/all/freeipa.yml b/environments/.stackhpc/inventory/group_vars/all/freeipa.yml index 030c86db6..4b3750650 100644 --- a/environments/.stackhpc/inventory/group_vars/all/freeipa.yml +++ b/environments/.stackhpc/inventory/group_vars/all/freeipa.yml @@ -2,8 +2,8 @@ # NB: Users defined this way have expired passwords freeipa_users: - - name: freeipatestuser # can't use rocky as $HOME isn't shared! - password: "{{ vault_freeipatestuser_password }}" + - name: testuser # can't use rocky as $HOME isn't shared! + password: "{{ test_user_password }}" givenname: test sn: test From 3708a5cc96e20f5c0db89cc70b13dbea91c0da6c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Wed, 1 Nov 2023 10:38:37 +0000 Subject: [PATCH 95/99] add note re freeipa server incompatibility with other virtual servers --- ansible/roles/freeipa/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index 2cc27095c..2c9e6c113 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -41,7 +41,7 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s See also use of `appliances_state_dir` on the control node as described above. # FreeIPA Server -As noted above this is only intended for development and testing. +As noted above this is only intended for development and testing. Note it cannot be run on the `openondemand` node as no other virtual servers must be defined in the Apache configuration. ## Usage - Add a single host to the `freeipa_server` group and run (at a minimum) the `ansible/bootstrap.yml` and `ansible/iam.yml` playbooks. From 4febcc65217c5fcf6cd9bf130be7a84f36ce78c3 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 2 Nov 2023 09:56:20 +0000 Subject: [PATCH 96/99] tweak freeipa names --- ansible/bootstrap.yml | 2 +- ansible/fatimage.yml | 2 +- ansible/iam.yml | 10 +++++----- ansible/roles/freeipa/tasks/{enrole.yml => enrol.yml} | 0 4 files changed, 7 insertions(+), 7 deletions(-) rename ansible/roles/freeipa/tasks/{enrole.yml => enrol.yml} (100%) diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index 1f7356fd6..e3baf1e51 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -90,7 +90,7 @@ gather_facts: yes become: yes tasks: - - name: Install freeipa server + - name: Install FreeIPA server import_role: name: freeipa tasks_from: server.yml diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index f3fe0bcce..ff27e34f3 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -35,7 +35,7 @@ enabled: false # - import_playbook: iam.yml - - name: Install freeipa client + - name: Install FreeIPA client import_role: name: freeipa tasks_from: client-install.yml diff --git a/ansible/iam.yml b/ansible/iam.yml index 988e96093..0286b9df3 100644 --- a/ansible/iam.yml +++ b/ansible/iam.yml @@ -6,7 +6,7 @@ gather_facts: no become: yes tasks: - - name: Add freeipa hosts + - name: Ensure FreeIPA client hosts are added to the FreeIPA server import_role: name: freeipa tasks_from: addhost.yml @@ -19,14 +19,14 @@ gather_facts: yes become: yes tasks: - - name: Install freeipa client + - name: Install FreeIPA client import_role: name: freeipa tasks_from: client-install.yml - - name: Enrole freeipa clients + - name: Enrol FreeIPA client import_role: name: freeipa - tasks_from: enrole.yml + tasks_from: enrol.yml - hosts: freeipa_server tags: @@ -36,7 +36,7 @@ gather_facts: yes become: yes tasks: - - name: Add freeipa users + - name: Add FreeIPA users import_role: name: freeipa tasks_from: users.yml diff --git a/ansible/roles/freeipa/tasks/enrole.yml b/ansible/roles/freeipa/tasks/enrol.yml similarity index 100% rename from ansible/roles/freeipa/tasks/enrole.yml rename to ansible/roles/freeipa/tasks/enrol.yml From 6c07eca37e6d65cbea6e9b51af3dc33ca8d3e3c6 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 2 Nov 2023 10:15:46 +0000 Subject: [PATCH 97/99] freeipa README nits --- ansible/roles/freeipa/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index 2c9e6c113..4247a7d1d 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -8,7 +8,7 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s ## Usage - Add hosts to the `freeipa_client` group and run (at a minimum) the `ansible/iam.yml` playbook. - Host names must match the domain name. By default (using the skeleton Terraform) hostnames are of the form `nodename.cluster_name.cluster_domain_suffix` where `cluster_name` and `cluster_domain_suffix` are Terraform variables. -- Hosts discover the FreeIPA server FQDN (and their own domain) from DNS records. If this is not set from DHCP, then use the `resolv_conf` role to configure this. For example when using the in-appliance FreeIPA development server,: +- Hosts discover the FreeIPA server FQDN (and their own domain) from DNS records. If DNS servers are not set this is not set from DHCP, then use the `resolv_conf` role to configure this. For example when using the in-appliance FreeIPA development server: ```ini # environments//groups @@ -25,9 +25,9 @@ Support FreeIPA in the appliance. In production use it is expected the FreeIPA s ``` -- For production use with an external FreeIPA server, a random one-time password (OTP) must be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrole the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. +- For production use with an external FreeIPA server, a random one-time password (OTP) must be generated when adding hosts to FreeIPA (e.g. using `ipa host-add --random ...`). This password should be set as a hostvar `freeipa_host_password`. Initial host enrolment will use this OTP to enrol the host. After this it becomes irrelevant so it does not need to be committed to git. This approach means the appliance does not require the FreeIPA administrator password. - For development use with the in-appliance FreeIPA server, `freeipa_host_password` will be automatically generated in memory. -- The `control` host must define `appliances_state_dir` (on persistent storage). This is used to back-up keytabs to allow FreeIPA clients to automatically reenroll after e.g. reimaging. Note that: +- The `control` host must define `appliances_state_dir` (on persistent storage). This is used to back-up keytabs to allow FreeIPA clients to automatically re-enrol after e.g. reimaging. Note that: - This is implemented when using the skeleton Terraform; on the control node `appliances_state_dir` defaults to `/var/lib/state` which is mounted from a volume. - Nodes are not re-enroled by a [Slurm-driven reimage](../../collections/ansible_collections/stackhpc/slurm_openstack_tools/roles/rebuild/README.md) (as that does not run this role). - If both a backed-up keytab and `freeipa_host_password` exist, the former is used. From a35ea269ff7df8f71a022f9ab329dcdee5431f10 Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 2 Nov 2023 10:16:08 +0000 Subject: [PATCH 98/99] freeipa editorial comments --- ansible/roles/freeipa/tasks/enrol.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/freeipa/tasks/enrol.yml b/ansible/roles/freeipa/tasks/enrol.yml index 63ca44ae1..07436509b 100644 --- a/ansible/roles/freeipa/tasks/enrol.yml +++ b/ansible/roles/freeipa/tasks/enrol.yml @@ -16,9 +16,9 @@ mode: ug=rw,o= when: '"content" in _slurp_persisted_keytab' -- name: Renrole with FreeIPA using backed-up keytab - # reenrolment requires --force-join and --password, or --keytab - # renrolement means: +- name: Re-enrol with FreeIPA using backed-up keytab + # Re-enrolment requires --force-join and --password, or --keytab + # Re-rolement means: # 1. A new host certificate is issued # 2. The old host certificate is revoked # 3. New SSH keys are generated @@ -38,7 +38,7 @@ ipa_client_install_keytab.rc !=0 and 'IPA client is already configured' not in ipa_client_install_keytab.stderr -- name: Enrole with FreeIPA using random password +- name: Enrol with FreeIPA using random password # Note --password is overloaded - it's bulkpassword unless --principal or --force-join is used in which case it's admin password command: cmd: > From 966d35087f02bb90e406a7d81d24e9bb0621dc4c Mon Sep 17 00:00:00 2001 From: Steve Brasier Date: Thu, 2 Nov 2023 10:36:39 +0000 Subject: [PATCH 99/99] remove argsplat from FreeIPA users task --- ansible/roles/freeipa/README.md | 4 ++-- ansible/roles/freeipa/tasks/users.yml | 25 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/ansible/roles/freeipa/README.md b/ansible/roles/freeipa/README.md index 4247a7d1d..5f2f377b6 100644 --- a/ansible/roles/freeipa/README.md +++ b/ansible/roles/freeipa/README.md @@ -62,7 +62,7 @@ These role variables are only required when using `freeipa_server`: - `freeipa_server_ip`: Optional, IP address of freeipa_server host. Default is `ansible_host` of the `freeipa_server` host. Default `false`. - `freeipa_setup_dns`: Optional bool, whether to configure the FreeIPA server as an integrated DNS server and define a zone and records. NB: This also controls whether `freeipa_client` hosts use the `freeipa_server` host for name resolution. Default `true` when `freeipa_server` contains a host. - `freeipa_client_ip`: Optional, IP address of FreeIPA client. Default is `ansible_host`. -- `freeipa_users`: A list of dicts as per parameters for [community.general.ipa_user](https://docs.ansible.com/ansible/latest/collections/community/general/ipa_user_module.html). Note that: +- `freeipa_users`: A list of dicts defining users to add, with keys/values as for [community.general.ipa_user](https://docs.ansible.com/ansible/latest/collections/community/general/ipa_user_module.html): Note that: - `name`, `givenname` (firstname) and `sn` (surname) are required. - - `ipa_pass` and `ipa_user` are automatically supplied. + - `ipa_host`, `ipa_port`, `ipa_prot`, `ipa_user`, `validate_certs` are automatically provided and cannot be overridden. - If `password` is set, the value should *not* be a hash (unlike `ansible.builtin.user` as used by the `basic_users` role), and it must be changed on first login. `krbpasswordexpiration` does not appear to be able to override this. diff --git a/ansible/roles/freeipa/tasks/users.yml b/ansible/roles/freeipa/tasks/users.yml index a6be8392c..bd1cacad3 100644 --- a/ansible/roles/freeipa/tasks/users.yml +++ b/ansible/roles/freeipa/tasks/users.yml @@ -1,4 +1,27 @@ - name: Add users to freeipa # This uses DNS to find the ipa server, which works as this is running on the enrolled ipa server - community.general.ipa_user: "{{ freeipa_user_defaults | combine(item) }}" + community.general.ipa_user: + displayname: "{{ item.displayname | default(omit) }}" + gidnumber: "{{ item.gidnumber | default(omit) }}" + givenname: "{{ item.givenname }}" + #ipa_host + ipa_pass: "{{ freeipa_admin_password | quote }}" + #ipa_port + #ipa_prot + ipa_timeout: "{{ item.ipa_timeout | default(omit) }}" + #ipa_user + krbpasswordexpiration: "{{ item.krbpasswordexpiration | default(omit) }}" + loginshell: "{{ item.loginshell | default(omit) }}" + mail: "{{ item.mail | default(omit) }}" + password: "{{ item.password | default(omit) }}" + sn: "{{ item.sn }}" + sshpubkey: "{{ item.sshpubkey | default(omit) }}" + state: "{{ item.state | default(omit) }}" + telephonenumber: "{{ item.telephonenumber | default(omit) }}" + title: "{{ item.title | default(omit) }}" + uid: "{{ item.name | default(item.uid) }}" + uidnumber: "{{ item.uidnumber | default(omit) }}" + update_password: "{{ item.update_password | default(omit) }}" + userauthtype: "{{ item.userauthtype | default(omit) }}" + #validate_certs loop: "{{ freeipa_users }}"