diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index ff27e34f3..cf23fa526 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -52,7 +52,11 @@ become: yes gather_facts: no tasks: - # - import_playbook: slurm.yml + # - import_playbook: slurm.yml: + - name: Setup DB + include_role: + name: mysql + tasks_from: install.yml - name: OpenHPC import_role: name: stackhpc.openhpc @@ -77,10 +81,10 @@ name: opensearch tasks_from: install.yml become: true - - # opensearch - containerised, nothing to do # slurm_stats - nothing to do - # filebeat - containerised - nothing to do + - import_role: + name: filebeat + tasks_from: install.yml - import_role: # can't only run cloudalchemy.node_exporter/tasks/install.yml as needs vars from preflight.yml and triggers service start diff --git a/ansible/monitoring.yml b/ansible/monitoring.yml index b8d5fc0a5..84f319688 100644 --- a/ansible/monitoring.yml +++ b/ansible/monitoring.yml @@ -26,19 +26,12 @@ # Collection currently requires root for all tasks. become: true -- name: Setup filebeat +- name: Deploy filebeat hosts: filebeat tags: filebeat tasks: - import_role: name: filebeat - tasks_from: config.yml - tags: config - - - import_role: - name: filebeat - tasks_from: deploy.yml - tags: deploy - name: Deploy node_exporter hosts: node_exporter diff --git a/ansible/roles/filebeat/tasks/deploy.yml b/ansible/roles/filebeat/tasks/deploy.yml deleted file mode 100644 index aa4f46f32..000000000 --- a/ansible/roles/filebeat/tasks/deploy.yml +++ /dev/null @@ -1,7 +0,0 @@ ---- -- name: Create systemd unit file - template: - dest: /etc/systemd/system/filebeat.service - src: filebeat.service.j2 - become: true - notify: Restart filebeat container diff --git a/ansible/roles/filebeat/tasks/install.yml b/ansible/roles/filebeat/tasks/install.yml new file mode 100644 index 000000000..8e64722ec --- /dev/null +++ b/ansible/roles/filebeat/tasks/install.yml @@ -0,0 +1,17 @@ +--- +- name: Create systemd unit file + template: + dest: /etc/systemd/system/filebeat.service + src: filebeat.service.j2 + become: true + register: _filebeat_unit + +- name: Pull container image + containers.podman.podman_image: + name: "docker.elastic.co/beats/filebeat-oss" + tag: "{{ filebeat_version }}" + become_user: "{{ filebeat_podman_user }}" + +- name: Reload filebeat unit file + command: systemctl daemon-reload + when: _filebeat_unit.changed diff --git a/ansible/roles/filebeat/tasks/main.yml b/ansible/roles/filebeat/tasks/main.yml new file mode 100644 index 000000000..849683c38 --- /dev/null +++ b/ansible/roles/filebeat/tasks/main.yml @@ -0,0 +1,2 @@ +- import_tasks: install.yml +- import_tasks: runtime.yml diff --git a/ansible/roles/filebeat/tasks/post.yml b/ansible/roles/filebeat/tasks/post.yml deleted file mode 100644 index 73b314ff7..000000000 --- a/ansible/roles/filebeat/tasks/post.yml +++ /dev/null @@ -1 +0,0 @@ ---- \ No newline at end of file diff --git a/ansible/roles/filebeat/tasks/config.yml b/ansible/roles/filebeat/tasks/runtime.yml similarity index 82% rename from ansible/roles/filebeat/tasks/config.yml rename to ansible/roles/filebeat/tasks/runtime.yml index 1e454347e..119745096 100644 --- a/ansible/roles/filebeat/tasks/config.yml +++ b/ansible/roles/filebeat/tasks/runtime.yml @@ -27,3 +27,13 @@ mode: 0600 notify: Restart filebeat container become: true + +- name: Flush handlers + meta: flush_handlers + +- name: Ensure filebeat service state + systemd: + name: filebeat.service + state: started + enabled: true + become: true diff --git a/ansible/roles/mysql/tasks/install.yml b/ansible/roles/mysql/tasks/install.yml index 7e19c7726..4427b7d18 100644 --- a/ansible/roles/mysql/tasks/install.yml +++ b/ansible/roles/mysql/tasks/install.yml @@ -8,3 +8,9 @@ dest: /etc/systemd/system/mysql.service src: mysql.service.j2 register: _mysql_unitfile + +- name: Pull container image + containers.podman.podman_image: + name: "mysql" + tag: "{{ mysql_tag }}" + become_user: "{{ mysql_podman_user }}" diff --git a/ansible/roles/opensearch/tasks/install.yml b/ansible/roles/opensearch/tasks/install.yml index 902c71d1f..81547e5a0 100644 --- a/ansible/roles/opensearch/tasks/install.yml +++ b/ansible/roles/opensearch/tasks/install.yml @@ -14,6 +14,12 @@ src: opensearch.service.j2 register: _opensearch_unit +- name: Pull container image + containers.podman.podman_image: + name: "opensearchproject/opensearch" + tag: "{{ opensearch_version }}" + become_user: "{{ opensearch_podman_user }}" + - name: Reload opensearch unit file command: systemctl daemon-reload when: _opensearch_unit.changed diff --git a/ansible/roles/opensearch/tasks/runtime.yml b/ansible/roles/opensearch/tasks/runtime.yml index b2cdeb456..7fe197abe 100644 --- a/ansible/roles/opensearch/tasks/runtime.yml +++ b/ansible/roles/opensearch/tasks/runtime.yml @@ -74,11 +74,6 @@ notify: Restart opensearch service become: true -- name: Pull container - containers.podman.podman_image: - name: "opensearchproject/opensearch:{{ opensearch_version }}" - become_user: "{{ opensearch_podman_user }}" - - name: Flush handlers meta: flush_handlers diff --git a/ansible/roles/podman/tasks/config.yml b/ansible/roles/podman/tasks/config.yml index 05dc8f757..86b8716b0 100644 --- a/ansible/roles/podman/tasks/config.yml +++ b/ansible/roles/podman/tasks/config.yml @@ -34,52 +34,3 @@ with_items: "{{ podman_users }}" register: podman_user_info become: yes - -- name: Define tmp directories on tmpfs - blockinfile: - path: /etc/tmpfiles.d/podman.conf - create: yes - block: | - d {{ podman_tmp_dir_root }}/{{ item.name }}/libpod/tmp 0755 {{ item.name }} {{ item.name }} - Z {{ podman_tmp_dir_root }}/{{ item.name }} 0755 {{ item.name }} {{ item.name }} - become: yes - loop: "{{ podman_users }}" - register: podman_tmp_dirs - -- name: Create tmp directories - command: systemd-tmpfiles --create - become: true - when: podman_tmp_dirs.results | selectattr('changed') | list | length > 0 # when: any changed - -- name: Create podman configuration directories - file: - path: "{{ item.home }}/.config/containers/" - state: directory - owner: "{{ item.name }}" - group: "{{ item.name }}" - become: yes - loop: "{{ podman_user_info.results }}" - -- name: Set podman to use temp directories - community.general.ini_file: - path: "{{ item.home }}/.config/containers/containers.conf" - section: engine - option: tmp_dir - value: '"{{ podman_tmp_dir_root }}/{{ item.name }}/libpod/tmp"' - owner: "{{ item.name }}" - group: "{{ item.name }}" - create: yes - loop: "{{ podman_user_info.results }}" - become: yes - register: podman_tmp - -- name: Reset podman database - # otherwise old config overrides! - command: - cmd: podman system reset --force - become: yes - become_user: "{{ item.item.name }}" - when: item.changed - loop: "{{ podman_tmp.results }}" - loop_control: - label: "{{ item.item.name }}" diff --git a/environments/.stackhpc/ARCUS.pkrvars.hcl b/environments/.stackhpc/ARCUS.pkrvars.hcl index b77318a3d..2b1bbfb39 100644 --- a/environments/.stackhpc/ARCUS.pkrvars.hcl +++ b/environments/.stackhpc/ARCUS.pkrvars.hcl @@ -1,6 +1,6 @@ flavor = "vm.ska.cpu.general.small" use_blockstorage_volume = true -volume_size = 10 # GB +volume_size = 12 # GB. Compatible with SMS-lab's general.v1.tiny image_disk_format = "qcow2" networks = ["4b6b2722-ee5b-40ec-8e52-a6610e14cc51"] # portal-internal (DNS broken on ilab-60) source_image_name = "openhpc-230804-1754-80b8d714" # https://github.com/stackhpc/ansible-slurm-appliance/pull/298 @@ -8,5 +8,4 @@ fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qc ssh_keypair_name = "slurm-app-ci" ssh_private_key_file = "~/.ssh/id_rsa" security_groups = ["default", "SSH"] -ssh_bastion_host = "128.232.222.183" -ssh_bastion_username = "slurm-app-ci" +floating_ip_network = "CUDN-Internet" # Use FIP to avoid docker ratelimits on portal-internal outbound IP diff --git a/environments/.stackhpc/inventory/group_vars/builder.yml b/environments/.stackhpc/inventory/group_vars/builder.yml new file mode 100644 index 000000000..8d7ee98d2 --- /dev/null +++ b/environments/.stackhpc/inventory/group_vars/builder.yml @@ -0,0 +1 @@ +#update_enable: false # Can uncomment for speed debugging non-update related build issues diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index 658e9c170..0ab3be5ee 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -13,7 +13,7 @@ variable "cluster_name" { variable "cluster_image" { description = "single image for all cluster nodes - a convenience for CI" type = string - default = "openhpc-240102-1025-e533fd70" # https://github.com/stackhpc/ansible-slurm-appliance/pull/346 + default = "openhpc-240116-1156-aa8dba7d" # https://github.com/stackhpc/ansible-slurm-appliance/pull/351 # default = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2" } diff --git a/environments/common/inventory/group_vars/builder/defaults.yml b/environments/common/inventory/group_vars/builder/defaults.yml index 414387ee1..a9fde767c 100644 --- a/environments/common/inventory/group_vars/builder/defaults.yml +++ b/environments/common/inventory/group_vars/builder/defaults.yml @@ -2,6 +2,7 @@ # NOTE: Might be better of as extra vars or in a builder specific inventory as # as dependent on alphabetical ordering of groups, so if these variables are # defined elsewhere the group that is ordered lower will determine the values. +update_enable: true openhpc_slurm_service_started: false nfs_client_mnt_state: present block_devices_partition_state: skip diff --git a/packer/openhpc_extravars.yml b/packer/openhpc_extravars.yml index 96a1b022b..66f668649 100644 --- a/packer/openhpc_extravars.yml +++ b/packer/openhpc_extravars.yml @@ -1 +1 @@ -update_enable: true +workaround_ansible_issue_61497: yes # extravars files can't be empty diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 803a42585..cad500e3f 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -92,12 +92,12 @@ variable "image_visibility" { variable "ssh_bastion_host" { type = string - default = "" + default = null } variable "ssh_bastion_username" { type = string - default = "" + default = null } variable "ssh_bastion_private_key_file" {