From c3a9e56c25318e5102019a4142796028c023a690 Mon Sep 17 00:00:00 2001 From: Alex Demidoff Date: Fri, 8 Nov 2024 12:34:48 +0300 Subject: [PATCH] PMM-12641 Optimize PMM build scripts (#3239) * PMM-12641 update ansible configs * PMM-12641 remove redundant playbooks/tasks * PMM-12641 optimize the structure of playbooks, make update.yml redundant * PMM-12641 trigger the build * PMM-12641 fix the playbook name * PMM-12641 pre-pull rpmbuild:3 to the agent * PMM-12641 remove randomwait=1 for faster builds * PMM-12641 put back the init.yml playbook * PMM-12641 remove the unused nginx repository * PMM-12641 fix pid location for nginx * PMM-12641 remove a redundant repository * PMM-12641 fix the startup logic * PMM-12641 simplify version detection * PMM-12641 fix an error * PMM-12641 fix the client user name * PMM-12641 fix nginx being unable to start * PMM-12641 install Goreleaser on the agent * PMM-12641 cleanup RH7 tweaks * PMM-12641 stop Grafana before reinstalling the plugins * PMM-12641 move nginx's pid to /tmp * PMM-12641 stop grafana before provisioning plugins * PMM-12641 remove redundant pkg provisioning * PMM-12641 revert moving nginx's pid * PMM-12641 remove redundant config reread * PMM-12641 do not start grafana initially * PMM-12641 cleanup the comments * PMM-12641 move post-build.yml to the same playbook * PMM-12641 cleanup the docs * PMM-12641 remove space around block: * PMM-12641 remove another redundant playbook * PMM-12641 add an ssh key for a team member * PMM-12641 remove redundant build script * PMM-12641 fix the padding * PMM-12641 don't add ssh keys to the agent * Apply suggestions from code review Co-authored-by: Catalina A <94133018+catalinaadam@users.noreply.github.com> * PMM-12641 Refine the task descriptions * PMM-12641 Refine the task descriptions * Update build/docker/client/Dockerfile Co-authored-by: Nurlan Moldomurov * PMM-12641 Remove the encryption key * PMM-12641 Fix duplicate psycopg2 installation * PMM-12641 move psycopg2 installation to pg playbook --------- Co-authored-by: Catalina A <94133018+catalinaadam@users.noreply.github.com> Co-authored-by: Nurlan Moldomurov --- build/Makefile | 9 +- build/ansible/ansible.cfg | 1 + build/ansible/hosts | 11 + build/ansible/pmm-docker/init.yml | 4 +- build/ansible/pmm-docker/main.yml | 1 + .../post-build.yml} | 28 +- build/ansible/pmm-docker/update.yml | 172 --------- build/ansible/pmm/create-lvm.yml | 65 ---- build/ansible/pmm/files/cloud.cfg | 89 ----- build/ansible/pmm/files/resize-xfs-lvm | 7 - build/ansible/pmm/files/supervisord.service | 14 - build/ansible/pmm/main.yml | 12 - build/ansible/pmm/systemd.yml | 43 --- build/ansible/roles/ami-ovf/tasks/main.yml | 20 - build/ansible/roles/dashboards/tasks/main.yml | 28 +- .../initialization}/files/maintenance.html | 0 .../roles/initialization/tasks/main.yml | 69 +--- build/ansible/roles/nginx/tasks/main.yml | 14 +- build/ansible/roles/pmm-images/tasks/main.yml | 30 +- build/ansible/roles/postgres/tasks/main.yml | 2 +- .../roles/supervisord/files/grafana.ini | 2 +- build/ansible/roles/supervisord/files/pmm.ini | 16 - build/docker/client/Dockerfile | 2 +- build/docker/server/Dockerfile.el9 | 7 +- build/docker/server/create_users.sh | 29 -- build/packages/rpm/client/pmm-client.spec | 2 +- build/packer/ansible/agent-aws.yml | 21 +- build/scripts/build-client-rpm | 8 - build/scripts/build-client-source | 2 +- build/scripts/build-client-srpm | 10 +- build/scripts/build-rpmbuild-docker | 11 - build/scripts/build-server-rpm | 12 +- build/scripts/install_tarball | 2 +- .../pmm-server-config/troubleshooting/logs.md | 2 +- .../01-vm-artifacts-redesign.md | 358 ------------------ .../01-vm-artifacts-redesign/pmm-server.ign | 66 ---- managed/services/server/logs_test.go | 13 +- 37 files changed, 127 insertions(+), 1055 deletions(-) create mode 100644 build/ansible/hosts rename build/ansible/{pmm/post-build-actions.yml => pmm-docker/post-build.yml} (81%) delete mode 100644 build/ansible/pmm-docker/update.yml delete mode 100644 build/ansible/pmm/create-lvm.yml delete mode 100644 build/ansible/pmm/files/cloud.cfg delete mode 100644 build/ansible/pmm/files/resize-xfs-lvm delete mode 100644 build/ansible/pmm/files/supervisord.service delete mode 100644 build/ansible/pmm/main.yml delete mode 100644 build/ansible/pmm/systemd.yml rename build/ansible/{pmm-docker => roles/initialization}/files/maintenance.html (100%) delete mode 100644 build/docker/server/create_users.sh delete mode 100755 build/scripts/build-rpmbuild-docker delete mode 100644 docs/proposals/pmm/01-vm-artifacts-redesign/01-vm-artifacts-redesign.md delete mode 100644 docs/proposals/pmm/01-vm-artifacts-redesign/pmm-server.ign diff --git a/build/Makefile b/build/Makefile index 2a07200b0b..a6c49d91a3 100644 --- a/build/Makefile +++ b/build/Makefile @@ -37,7 +37,8 @@ pmm-ami: build -var 'pmm_server_image_name=${PMM_SERVER_IMAGE}' -only amazon-ebs -color=false packer/pmm.json | tee build.log ## ----------------- PACKER ------------------ -check: ## Run required checks and linters - ansible-playbook --syntax-check ansible/pmm-docker/update.yml - ansible-playbook --check ansible/pmm-docker/update.yml - ansible-lint ansible/pmm-docker/update.yml +check: + echo "TODO: Since update.yml has been deprecated, see if other playbooks need to be linted" + # ansible-playbook --syntax-check ansible/pmm-docker/update.yml + # ansible-playbook --check ansible/pmm-docker/update.yml + # ansible-lint ansible/pmm-docker/update.yml diff --git a/build/ansible/ansible.cfg b/build/ansible/ansible.cfg index 60507f8031..69181e7580 100644 --- a/build/ansible/ansible.cfg +++ b/build/ansible/ansible.cfg @@ -3,6 +3,7 @@ # Otherwise, it will fail with 'Permission denied' error since the default paths are '/root/.ansible/tmp' # Ref: https://github.com/ansible/ansible/blob/stable-2.9/examples/ansible.cfg [defaults] +interpreter_python = /usr/bin/python remote_tmp = /tmp local_tmp = /tmp diff --git a/build/ansible/hosts b/build/ansible/hosts new file mode 100644 index 0000000000..2943c8ea7e --- /dev/null +++ b/build/ansible/hosts @@ -0,0 +1,11 @@ +# This is the default ansible 'hosts' file. +# +# It should live in /etc/ansible/hosts +# +# - Comments begin with the '#' character +# - Blank lines are ignored +# - Groups of hosts are delimited by [header] elements +# - You can enter hostnames or ip addresses +# - A hostname/ip can be a member of multiple groups + +127.0.0.1 ansible_connection=local diff --git a/build/ansible/pmm-docker/init.yml b/build/ansible/pmm-docker/init.yml index 4eba8ceb66..19c1227bc4 100644 --- a/build/ansible/pmm-docker/init.yml +++ b/build/ansible/pmm-docker/init.yml @@ -1,6 +1,6 @@ --- -# This playbook contains tasks executed during initialization PMM Server -- hosts: localhost +# This playbook gets executed by pmm-update-perform-init +- hosts: all become: true become_method: su become_user: pmm diff --git a/build/ansible/pmm-docker/main.yml b/build/ansible/pmm-docker/main.yml index 4165df52cd..207848581c 100644 --- a/build/ansible/pmm-docker/main.yml +++ b/build/ansible/pmm-docker/main.yml @@ -2,5 +2,6 @@ - hosts: all become: yes gather_facts: yes + roles: - pmm-images diff --git a/build/ansible/pmm/post-build-actions.yml b/build/ansible/pmm-docker/post-build.yml similarity index 81% rename from build/ansible/pmm/post-build-actions.yml rename to build/ansible/pmm-docker/post-build.yml index 9f8a6b7b81..96d4945937 100644 --- a/build/ansible/pmm/post-build-actions.yml +++ b/build/ansible/pmm-docker/post-build.yml @@ -27,38 +27,19 @@ --server-address=127.0.0.1:8443 --server-insecure-tls - - name: Reread supervisord configuration - command: supervisorctl reread - become: true - become_user: pmm - become_method: su - register: reread_result - changed_when: "'No config updates to processes' not in reread_result.stdout" - - - name: See which service configs changed - debug: var=reread_result.stdout_lines - - - name: Stop pmm-managed before deleting the database - supervisorctl: - name: pmm-managed - state: stopped - become: true - become_user: pmm - become_method: su - - - name: Remove pmm-managed database + - name: Remove pmm-managed database from PostgreSQL postgresql_db: login_user: postgres name: pmm-managed force: true state: absent - - name: Remove pmm-managed role from postgres + - name: Remove pmm-managed role from PostgreSQL database postgresql_user: name: pmm-managed state: absent - - name: Stop supervisord service for docker + - name: Stop supervisord service command: supervisorctl shutdown become: true become_user: pmm @@ -87,6 +68,7 @@ - /var/log/clickhouse-server - /var/log/nginx - /var/lib/pgsql + - /srv/pmm-encryption.key - name: Remove users created by installers user: @@ -106,7 +88,7 @@ # This is due to the way OverlayFS and kernel works. More info https://github.com/moby/moby/issues/25409 # This step ensures the directory is empty on startup and the contents are recreated. # This is not an issue if the folder is in a volume or mounted from the host OS. - - name: Recreate /srv/victoriametrics folder + - name: Recreate '/srv/victoriametrics' directory file: state: "{{ item }}" path: /srv/victoriametrics diff --git a/build/ansible/pmm-docker/update.yml b/build/ansible/pmm-docker/update.yml deleted file mode 100644 index 019dcc8f98..0000000000 --- a/build/ansible/pmm-docker/update.yml +++ /dev/null @@ -1,172 +0,0 @@ ---- -# This playbook contains tasks executed during PMM Server update. -- hosts: localhost - become: true - remote_user: root - gather_facts: true - - environment: - PATH: /usr/local/bin:{{ ansible_env.PATH }} - - pre_tasks: - - name: Detect /srv/pmm-distribution - stat: - path: /srv/pmm-distribution - no_log: true - register: srv_pmm_distribution - - - name: Detect container environment - set_fact: - is_docker: '{{ lookup("file", "/srv/pmm-distribution") == "docker" }}' - no_log: true - when: srv_pmm_distribution.stat.exists - - - name: Set the variable to true if undefined - set_fact: - is_docker: true - when: is_docker is undefined - - tasks: - - name: Enable maintenance mode - copy: - src: maintenance.html - dest: /usr/share/pmm-server/maintenance/ - owner: pmm - group: pmm - mode: 0644 - - # restart pmm-managed-init and pmm-managed first as they may update supervisord configuration on start - - name: Generate new supervisor config - command: pmm-managed-init - register: managed_init_result - changed_when: True - - - name: Disable pmm-update-perform-init - ini_file: - path: /etc/supervisord.d/pmm.ini - section: program:pmm-update-perform-init - option: autostart - value: "false" - - - name: Check that supervisor socket exists - stat: - path: /run/supervisor/supervisor.sock - register: supervisor_socket - - # During build time, this will be the first start of supervisord. - - name: Start supervisord - when: not supervisor_socket.stat.exists - shell: supervisord -c /etc/supervisord.conf & - - - name: Wait until postgres port is present - wait_for: - host: localhost - port: 5432 - timeout: 150 - - - name: Run initialization playbook - include_role: - name: initialization - - # See https://github.com/Supervisor/supervisor/issues/1264 for explanation - # why we do reread + stop/remove/add instead of using supervisorctl Ansible module. - - name: Reread supervisord configuration - command: supervisorctl reread - become: true - become_user: pmm - become_method: su - register: reread_result - changed_when: "'No config updates to processes' not in reread_result.stdout" - - - name: Check reread results - debug: var=reread_result.stdout_lines - - - name: Restart pmm-managed - command: "supervisorctl {{ item }} pmm-managed" - become: true - become_user: pmm - become_method: su - loop: - - stop - - remove - - add - - # Give pmm-managed time to update supervisord configuration, - # and give update UI time to catch up after pmm-managed restart - - name: Wait for pmm-managed - pause: seconds=10 - - # Fix things that should be fixed before restarts. - - - name: Reread supervisord configuration again - command: supervisorctl reread - register: reread_result - changed_when: "'No config updates to processes' not in reread_result.stdout" - - - name: Check reread results - debug: var=reread_result.stdout_lines - - - name: Restart services - command: supervisorctl {{ item.1 }} {{ item.0 }} - become: true - become_user: pmm - become_method: su - with_nested: - - - nginx - - grafana - - qan-api2 - - pmm-agent - - ["stop", "remove", "add"] - - - name: Check supervisord logs - shell: sleep 10 && tail -n 200 /srv/logs/supervisord.log - - - name: Check grafana logs - shell: cat /srv/logs/grafana.log - - # Regenerating pmm.ini and enabling pmm-update-perform-init - - name: Generate new supervisor config - command: pmm-managed-init - become: true - become_user: pmm - become_method: su - register: managed_init_result - changed_when: True - - - name: Reread pmm-update-perform-init supervisor config - command: supervisorctl reread - register: reread_init__result - changed_when: "'No config updates to processes' not in reread_init__result.stdout" - - - name: Update/restart other services - command: supervisorctl update - register: update_result - changed_when: "'updated' in update_result.stdout" - - - name: Print other services's logs - debug: var=update_result.stdout_lines - - - name: Wait for PMM to be ready - ansible.builtin.uri: - url: "http://127.0.0.1:7772/v1/server/readyz" - status_code: 200 - method: GET - register: healthcheck - until: healthcheck is not failed - retries: 120 - delay: 1 - - # SIGUSR2 is sent to supervisord by pmm-managed right before the update for logging to work correctly. - # We use that fact to show what was restarted during the update. - - name: Get supervisord logs - shell: supervisorctl maintail -100000 | tac | awk '!flag; /received SIGUSR2/{flag = 1};' | tac - register: maintail_result - changed_when: False - - - name: Print supervisord logs - debug: var=maintail_result.stdout_lines - - - name: Disable maintenance mode - file: - state: absent - path: /usr/share/pmm-server/maintenance/maintenance.html diff --git a/build/ansible/pmm/create-lvm.yml b/build/ansible/pmm/create-lvm.yml deleted file mode 100644 index 0126562c0a..0000000000 --- a/build/ansible/pmm/create-lvm.yml +++ /dev/null @@ -1,65 +0,0 @@ -# TODO: This role seems to no longer be used. Verify and remove. -- hosts: localhost - become: true - gather_facts: true - tasks: - - name: Stop supervisord - service: - name: supervisord - state: stopped - - - name: "Copy /srv dir to a safe location" - synchronize: - src: "/srv/" - dest: "/srv-copy" - recursive: yes - - - name: Delete the old /srv directory - file: - state: absent - path: /srv/ - - - name: Create a Volume Group - lvg: - vg: DataVG - pvs: "/dev/nvme1n1" - - - name: Create Thin Pool - register: thin_pool - failed_when: "thin_pool is failed and 'Sorry, no shrinking of DataLV to 0 permitted' not in thin_pool.msg" - lvol: - lv: DataLV - vg: DataVG - size: 100%FREE - opts: --thinpool ThinPool -V 50G - - - name: Format LVM - filesystem: - fstype: xfs - dev: /dev/DataVG/DataLV - opts: -L DATA - - - name: Mount - mount: - name: "/srv" - src: LABEL=DATA - fstype: xfs - opts: defaults,nofail - state: mounted - - - name: Copy files to new partition - synchronize: - src: "/srv-copy/" - dest: "/srv/" - delete: yes - recursive: yes - - - name: Delete the copy of /srv directory - file: - state: absent - path: /srv-copy/ - - - name: Start supervisord - service: - name: supervisord - state: started diff --git a/build/ansible/pmm/files/cloud.cfg b/build/ansible/pmm/files/cloud.cfg deleted file mode 100644 index 97aaf5084c..0000000000 --- a/build/ansible/pmm/files/cloud.cfg +++ /dev/null @@ -1,89 +0,0 @@ -# NOTE: this is a sample cloud.cfg file that is retrieved when building PMM on DigitalOcean. -# It shall be used as a reference only. It is not used by PMM. - -# The top level settings are used as module -# and system configuration. -# A set of users which may be applied and/or used by various modules -# when a 'default' entry is found it will reference the 'default_user' -# from the distro configuration specified below -users: - - default - - -# If this is set, 'root' will not be able to ssh in and they -# will get a message to login instead as the default $user -disable_root: true - -# This will cause the set+update hostname module to not operate (if true) -preserve_hostname: false - -# If you use datasource_list array, keep array items in a single line. -# If you use multi line array, ds-identify script won't read array items. -# Example datasource config -# datasource: -# Ec2: -# metadata_urls: [ 'blah.com' ] -# timeout: 5 # (defaults to 50 seconds) -# max_wait: 10 # (defaults to 120 seconds) - - - - -# The modules that run in the 'init' stage -cloud_init_modules: - - migrator - - seed_random - - bootcmd - - write-files - - growpart - - resizefs - - disk_setup - - mounts - - set_hostname - - update_hostname - - update_etc_hosts - - ca-certs - - rsyslog - - users-groups - - ssh - -# The modules that run in the 'config' stage -cloud_config_modules: - - ssh-import-id - - keyboard - - locale - - set-passwords - - ntp - - timezone - - disable-ec2-metadata - - runcmd - -# The modules that run in the 'final' stage -cloud_final_modules: - - package-update-upgrade-install - - write-files-deferred - - puppet - - chef - - mcollective - - salt-minion - - reset_rmc - - refresh_rmc_and_interface - - rightscale_userdata - - scripts-vendor - - scripts-per-once - - scripts-per-boot - - scripts-per-instance - - scripts-user - - ssh-authkey-fingerprints - - keys-to-console - - install-hotplug - - phone-home - - final-message - - power-state-change - -# System and/or distro specific settings -# (not accessible to handlers/transforms) -system_info: - # This will affect which distro class gets used - # Unknown/fallback distro. - distro: ubuntu \ No newline at end of file diff --git a/build/ansible/pmm/files/resize-xfs-lvm b/build/ansible/pmm/files/resize-xfs-lvm deleted file mode 100644 index b5415db7ae..0000000000 --- a/build/ansible/pmm/files/resize-xfs-lvm +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -/usr/sbin/pvresize $(/usr/bin/ls /dev/sda /dev/sdb /dev/sdc /dev/xvdb /dev/nvme1n1 2>/dev/null | /usr/bin/grep -v ^$(/usr/sbin/pvdisplay -c | /usr/bin/grep ':VolGroup00:' | /usr/bin/cut -d ':' -f 1 | /usr/bin/tr -d '[:space:]' | /usr/bin/sed 's/[0-9]$//')$ | /usr/bin/grep -v ^$(/usr/bin/findmnt -f -n -o SOURCE / | /usr/bin/sed 's/[0-9]$//')$ | /usr/bin/grep -v ^$(/usr/bin/findmnt -f -n -o SOURCE /mnt/resource | /usr/bin/sed 's/[0-9]$//')$) -/usr/sbin/lvextend -l '1%VG' /dev/DataVG/ThinPool_tmeta -/usr/sbin/lvextend -l '100%VG' /dev/DataVG/ThinPool -/usr/sbin/lvextend -l '80%PVS' /dev/DataVG/DataLV -/usr/sbin/xfs_growfs -d /srv diff --git a/build/ansible/pmm/files/supervisord.service b/build/ansible/pmm/files/supervisord.service deleted file mode 100644 index b62338f97f..0000000000 --- a/build/ansible/pmm/files/supervisord.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=Process Monitoring and Control Daemon -After=rc-local.service nss-user-lookup.target -After=network.target -RequiresMountsFor=/srv - -[Service] -Type=simple -# we need to wait till time is synchronized -ExecStartPre=/usr/bin/sleep 10 -ExecStart=/usr/bin/supervisord -n -c /etc/supervisord.conf - -[Install] -WantedBy=multi-user.target diff --git a/build/ansible/pmm/main.yml b/build/ansible/pmm/main.yml deleted file mode 100644 index ca70705e92..0000000000 --- a/build/ansible/pmm/main.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -# This playbook is frozen after 2.0.0 GA release. -# All new plays should be added into pmm-update playbook. It is invoked after this one. - -- hosts: all - become: yes - gather_facts: yes - roles: - - cloud-node - - lvm-init - - pmm-images - - ami-ovf diff --git a/build/ansible/pmm/systemd.yml b/build/ansible/pmm/systemd.yml deleted file mode 100644 index 8d16d20a2f..0000000000 --- a/build/ansible/pmm/systemd.yml +++ /dev/null @@ -1,43 +0,0 @@ ---- -# This playbook contains tasks executed during PMM Server update in non-docker environments. -# TODO: refactor from supervisord to systemd if necessary. -# NOTE: it's currently unused, just a placeholder for future use. -- hosts: all - become: true - remote_user: root - gather_facts: true - - # TODO: replace supervisord.service with pmm.service - tasks: - # Note: forking type must be set to 'simple' - - name: Configure supervisord - copy: - src: supervisord.service - dest: /usr/lib/systemd/system/supervisord.service - mode: 0644 - - # Start the services - - name: Enable supervisord service to persist between reboots - systemd: - name: supervisord - enabled: yes - - - name: Start supervisord service for AMI/OVF - systemd: - name: supervisord - state: started # supervisord may already be running - daemon_reload: yes - - - name: Enable crond service - service: - name: crond - state: started - enabled: yes - - # https://jira.percona.com/browse/PMM-9298 - - name: Copy rezise-xfs file for lvm - copy: - src: resize-xfs-lvm - dest: /var/lib/cloud/scripts/per-boot/resize-xfs - mode: 0755 - force: true diff --git a/build/ansible/roles/ami-ovf/tasks/main.yml b/build/ansible/roles/ami-ovf/tasks/main.yml index d9431f4631..eaa26fb2da 100644 --- a/build/ansible/roles/ami-ovf/tasks/main.yml +++ b/build/ansible/roles/ami-ovf/tasks/main.yml @@ -1,24 +1,4 @@ --- -- name: Packages | Remove PMM3 Server testing repository - yum_repository: - name: pmm-server - state: absent - -- name: Packages | Clean up yum metadata - command: yum clean metadata - -- name: Packages | Add PMM3 Server release repository for EL9 - when: - - ansible_distribution == 'OracleLinux' or ansible_distribution == 'AlmaLinux' - - ansible_distribution_major_version == '9' - yum_repository: - name: pmm-server - description: PMM Server YUM repository - x86_64 - baseurl: https://repo.percona.com/pmm3-components/yum/experimental/9/RPMS/x86_64/ - gpgcheck: yes - enabled: yes - gpgkey: file:///etc/pki/rpm-gpg/PERCONA-PACKAGING-KEY - - name: Disable SELinux | EL9 when: - ansible_distribution == 'OracleLinux' or ansible_distribution == 'AlmaLinux' diff --git a/build/ansible/roles/dashboards/tasks/main.yml b/build/ansible/roles/dashboards/tasks/main.yml index e0c51bd75e..cb02cfafc6 100644 --- a/build/ansible/roles/dashboards/tasks/main.yml +++ b/build/ansible/roles/dashboards/tasks/main.yml @@ -1,18 +1,27 @@ --- -- name: Get plugin list +- name: Stop and remove Grafana from Supervisor + shell: "supervisorctl {{ item }} grafana" + become: true + become_user: pmm + become_method: su + loop: + - stop + - remove + +- name: Retrieve Percona plugins find: paths: /usr/share/percona-dashboards/panels/ depth: 2 file_type: directory register: plugin_list -- name: Delete older plugins +- name: Delete outdated plugins file: path: "/srv/grafana/plugins/{{ item['path'].split('/')[-1] }}" state: absent loop: "{{ plugin_list['files'] }}" -- name: Copy plugins to the plugin directory +- name: Copy new plugins to the plugin directory synchronize: src: /usr/share/percona-dashboards/panels/ dest: /srv/grafana/plugins/ @@ -26,12 +35,19 @@ mode: 0775 recurse: yes -- name: Restart grafana with new plugins +- name: Synchronize Percona Dashboards version file after upgrade + copy: + src: /usr/share/percona-dashboards/VERSION + dest: /srv/grafana/PERCONA_DASHBOARDS_VERSION + owner: pmm + group: pmm + mode: 0644 + remote_src: yes + +- name: Restart Grafana service with new plugins shell: "supervisorctl {{ item }} grafana" become: true become_user: pmm become_method: su loop: - - stop - - remove - add diff --git a/build/ansible/pmm-docker/files/maintenance.html b/build/ansible/roles/initialization/files/maintenance.html similarity index 100% rename from build/ansible/pmm-docker/files/maintenance.html rename to build/ansible/roles/initialization/files/maintenance.html diff --git a/build/ansible/roles/initialization/tasks/main.yml b/build/ansible/roles/initialization/tasks/main.yml index 3b091d8048..f66efd862a 100644 --- a/build/ansible/roles/initialization/tasks/main.yml +++ b/build/ansible/roles/initialization/tasks/main.yml @@ -1,49 +1,25 @@ --- # This role contains tasks executed during initialization of PMM Server -- name: detect /srv/pmm-distribution - slurp: - path: /srv/pmm-distribution - register: pmm_distribution - ignore_errors: True -- name: detect AMI +- name: Detect AMI distribution set_fact: - is_ami: "{{ pmm_distribution['content'] | b64decode | trim == 'ami' }}" - when: pmm_distribution['failed'] == false + pmm_distribution: "{{ lookup('file', '/srv/pmm-distribution', errors='ignore') | default('none', true) }}" -- name: Set PMM distribution +- name: Set is_ami fact set_fact: - is_ami: "False" - when: pmm_distribution['failed'] == true + is_ami: "{{ pmm_distribution | trim == 'ami' }}" -- name: Get current version - slurp: - src: /srv/grafana/PERCONA_DASHBOARDS_VERSION - register: current_version_file - ignore_errors: True - -- name: Get image version - slurp: - src: /usr/share/percona-dashboards/VERSION - register: image_version_file - -- name: Set current version if VERSION doesn't exist +- name: Get current PMM version (aka 'update from') set_fact: - pmm_current_version: "0.0" - when: current_version_file['failed'] == true + pmm_current_version: "{{ lookup('file', '/srv/grafana/PERCONA_DASHBOARDS_VERSION', errors='ignore') | default('0', true) }}" -- name: Setting current PMM version +- name: Get PMM image version for update set_fact: - pmm_current_version: "{{ current_version_file['content'] | b64decode | trim }}" - when: current_version_file['failed'] != true - -- name: Setting current PMM image version - set_fact: - pmm_image_version: "{{ image_version_file['content'] | b64decode | trim }}" + pmm_image_version: "{{ lookup('file', '/usr/share/percona-dashboards/VERSION', errors='strict') }}" - name: Set need_initialization fact set_fact: - need_initialization: "{{ current_version_file['failed'] == true }}" + need_initialization: "{{ pmm_current_version == '0' }}" - name: Set need_upgrade fact set_fact: @@ -53,9 +29,9 @@ debug: msg: "PMM distribution: {{ pmm_distribution }}, Is AMI: {{ is_ami }}" -- name: Print current PMM and image versions +- name: Show PMM current and target image versions debug: - msg: "Current version: {{ pmm_current_version }} Image Version: {{ pmm_image_version }}" + msg: "Current version: {{ pmm_current_version }}, Image Version: {{ pmm_image_version }}" - name: Print need_initialization fact debug: @@ -65,8 +41,8 @@ debug: msg: "Need upgrade: {{ need_upgrade }}" -- name: Perform upgrade & init tasks - block: +- name: Execute PMM upgrade & initialization tasks + block: # when need_initialization or need_upgrade - name: Enable maintenance mode before upgrade copy: src: maintenance.html @@ -75,7 +51,13 @@ group: pmm mode: 0644 - - name: Create grafana DB + - name: Wait for PostgreSQL to become responsive + wait_for: + host: 127.0.0.1 + port: 5432 + timeout: 150 + + - name: Create Grafana DB block: - name: Create grafana database in postgres postgresql_db: @@ -99,15 +81,6 @@ include_role: name: dashboards - - name: Copy file with image version - copy: - src: /usr/share/percona-dashboards/VERSION - dest: /srv/grafana/PERCONA_DASHBOARDS_VERSION - owner: pmm - group: pmm - mode: 0644 - remote_src: yes - - name: Create a backup directory file: path: /srv/backup @@ -130,7 +103,7 @@ delay: 5 ignore_errors: true - - name: init admin password on AMI + - name: Initialize admin password for AMI if needed include_role: name: init-admin-password-ami when: need_initialization and is_ami diff --git a/build/ansible/roles/nginx/tasks/main.yml b/build/ansible/roles/nginx/tasks/main.yml index d57a142e73..18d5634e51 100644 --- a/build/ansible/roles/nginx/tasks/main.yml +++ b/build/ansible/roles/nginx/tasks/main.yml @@ -1,14 +1,5 @@ --- -# We already have nginx package in epel repo -- name: Add Nginx repository - yum_repository: - name: nginx - description: nginx repo - baseurl: http://nginx.org/packages/rhel/9/$basearch/ - gpgcheck: no - enabled: no - -- name: Create directories for nginx +- name: Create required directories for Nginx file: path: "{{ item }}" state: directory @@ -79,10 +70,11 @@ recurse: yes loop: - /var/lib/nginx + - /var/log/nginx - /etc/nginx - /srv/nginx -- name: Change ownership of nginx files +- name: Provision empty Nginx configuration files file: path: "{{ item }}" state: touch diff --git a/build/ansible/roles/pmm-images/tasks/main.yml b/build/ansible/roles/pmm-images/tasks/main.yml index 9c3942eda2..615e468d68 100644 --- a/build/ansible/roles/pmm-images/tasks/main.yml +++ b/build/ansible/roles/pmm-images/tasks/main.yml @@ -29,9 +29,8 @@ yum: name: - python3-pip - - python3.11-pip - - python3.11-psycopg2 - rsync + state: latest - name: Create groups group: @@ -144,3 +143,30 @@ state: touch owner: pmm group: pmm + +# Launch pmm-managed-init first as it may update supervisord configuration on start +- name: Generate new supervisor config + command: pmm-managed-init + register: managed_init_result + changed_when: True + +- name: Disable pmm-update-perform-init + ini_file: + path: /etc/supervisord.d/pmm.ini + section: program:pmm-update-perform-init + option: autostart + value: "false" + +# During build time, this will be the first start of supervisord. +- name: Start supervisord + shell: supervisord -c /etc/supervisord.conf & + +- name: Run initialization playbook + include_role: + name: initialization + +- name: Check supervisord logs + shell: sleep 10 && tail -n 200 /srv/logs/supervisord.log + +- name: Check grafana logs + shell: cat /srv/logs/grafana.log diff --git a/build/ansible/roles/postgres/tasks/main.yml b/build/ansible/roles/postgres/tasks/main.yml index dc21fa5563..d308d17a18 100644 --- a/build/ansible/roles/postgres/tasks/main.yml +++ b/build/ansible/roles/postgres/tasks/main.yml @@ -16,7 +16,7 @@ - percona-postgresql14-server - percona-postgresql14-contrib - percona-postgresql14 - - python-psycopg2 # Python PostgreSQL database adapter + - python3-psycopg2 # Python PostgreSQL database adapter state: installed - name: Create a socket directory for Postgres diff --git a/build/ansible/roles/supervisord/files/grafana.ini b/build/ansible/roles/supervisord/files/grafana.ini index 8bc767213d..09d238dc61 100644 --- a/build/ansible/roles/supervisord/files/grafana.ini +++ b/build/ansible/roles/supervisord/files/grafana.ini @@ -10,7 +10,7 @@ command = user = pmm directory = /usr/share/grafana autorestart = true -autostart = true +autostart = false startretries = 10 startsecs = 1 stopsignal = TERM diff --git a/build/ansible/roles/supervisord/files/pmm.ini b/build/ansible/roles/supervisord/files/pmm.ini index dec1638e1a..9facb3cbb9 100644 --- a/build/ansible/roles/supervisord/files/pmm.ini +++ b/build/ansible/roles/supervisord/files/pmm.ini @@ -117,19 +117,3 @@ stdout_logfile = /srv/logs/pmm-agent.log stdout_logfile_maxbytes = 50MB stdout_logfile_backups = 2 redirect_stderr = true - -[program:pmm-update-perform] -command = /usr/sbin/pmm-update -perform -playbook=/opt/ansible/pmm-docker/update.yml -user = pmm -directory = / -autorestart = unexpected -exitcodes = 0 -autostart = false -startretries = 10 -startsecs = 1 -stopsignal = TERM -stopwaitsecs = 300 -stdout_logfile = /srv/logs/pmm-update-perform.log -stdout_logfile_maxbytes = 50MB -stdout_logfile_backups = 3 -redirect_stderr = true diff --git a/build/docker/client/Dockerfile b/build/docker/client/Dockerfile index a5d82c7165..eb7134729d 100644 --- a/build/docker/client/Dockerfile +++ b/build/docker/client/Dockerfile @@ -5,7 +5,7 @@ RUN microdnf install shadow-utils jq tar -y RUN groupadd -g 1002 pmm-agent && \ useradd -u 1002 -r -g pmm-agent -s /sbin/nologin \ -d /usr/local/percona/pmm \ - -c "PMM 2.X Client User" pmm-agent + -c "PMM Client User" pmm-agent FROM redhat/ubi9-micro diff --git a/build/docker/server/Dockerfile.el9 b/build/docker/server/Dockerfile.el9 index d5feb02671..cf40e4679f 100644 --- a/build/docker/server/Dockerfile.el9 +++ b/build/docker/server/Dockerfile.el9 @@ -4,7 +4,6 @@ ARG VERSION ARG BUILD_DATE ENV LANG=en_US.utf8 -ENV LC_ALL=en_US.utf8 ENV GF_PLUGIN_DIR=/srv/grafana/plugins ENV PS1="[\u@\h \W] # " @@ -28,9 +27,9 @@ COPY gitCommit /tmp/gitCommit COPY pmm-client.tar.gz /tmp/ RUN install -T -p -m 644 /opt/ansible/ansible.cfg /etc/ansible/ansible.cfg && \ - ansible-playbook -vvv -i 'localhost,' -c local /opt/ansible/pmm-docker/main.yml && \ - ansible-playbook -vvv -i 'localhost,' -c local /opt/ansible/pmm-docker/update.yml && \ - ansible-playbook -vvv -i 'localhost,' -c local /opt/ansible/pmm/post-build-actions.yml && \ + install -T -p -m 644 /opt/ansible/hosts /etc/ansible/hosts && \ + ansible-playbook -vvv /opt/ansible/pmm-docker/main.yml && \ + ansible-playbook -vvv /opt/ansible/pmm-docker/post-build.yml && \ sed -i '/^assumeyes/d' /etc/dnf/dnf.conf LABEL org.opencontainers.image.created ${BUILD_DATE} diff --git a/build/docker/server/create_users.sh b/build/docker/server/create_users.sh deleted file mode 100644 index e37eae3bf8..0000000000 --- a/build/docker/server/create_users.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -users=( - "pmm:1000:/bin/false:/home/pmm:pmm" - "nginx:1001:/sbin/nologin:/var/cache/nginx:nginx" -) - -for user in "${users[@]}"; do - IFS=: read -r name uid shell home_dir group <<< "$user" - group_id="$uid" - - # Check if user already exists - if id "$name" >/dev/null 2>&1; then - echo "User $name already exists" - continue - fi - - # Create user with home directory if it doesn't exist - if [ ! -d "$home_dir" ]; then - mkdir -p "$home_dir" - fi - - # Create user with specified UID, GID, and shell - groupadd -o -g "$group_id" "$group" - useradd -o -u "$uid" -g "$group" -G "$group" -s "$shell" -d "$home_dir" -c "$name" -m "$name" - chown "$uid:$group_id" "$home_dir" - -done - diff --git a/build/packages/rpm/client/pmm-client.spec b/build/packages/rpm/client/pmm-client.spec index 98e78c8eb7..44d87c0f04 100644 --- a/build/packages/rpm/client/pmm-client.spec +++ b/build/packages/rpm/client/pmm-client.spec @@ -22,7 +22,7 @@ Requires(postun): systemd AutoReq: no Conflicts: pmm-client -Obsoletes: pmm2-client < 3.0.0 +Obsoletes: pmm2-client < 3.0.0 %description Percona Monitoring and Management (PMM) is an open-source platform for managing and monitoring MySQL and MongoDB diff --git a/build/packer/ansible/agent-aws.yml b/build/packer/ansible/agent-aws.yml index 910e93cef9..7bf8539408 100644 --- a/build/packer/ansible/agent-aws.yml +++ b/build/packer/ansible/agent-aws.yml @@ -7,6 +7,7 @@ kubectl_version: "1.29.2" doctl_version: "1.105.0" node_version: "18.x" + goreleaser_version: "2.3.2" # https://github.com/goreleaser/goreleaser/releases tasks: - name: Detect vm architecture @@ -41,6 +42,8 @@ - docker-ce - docker-ce-cli - containerd.io + - docker-buildx-plugin + - docker-compose-plugin state: latest - name: Enable Docker service @@ -67,8 +70,9 @@ - gcc - make - perl - #- p7zip + - bc - nodejs + - chromium state: latest - name: Install Java17 @@ -94,6 +98,7 @@ - name: Install bats shell: "npm install -g bats" + # TODO: remove once we refactor percona/pmm-qa and percona-lab/qa-integration - name: Install docker-compose get_url: url: "https://github.com/docker/compose/releases/download/v{{ docker_compose_version }}/docker-compose-linux-{{ ansible_architecture }}" @@ -112,11 +117,6 @@ dest: /usr/local/bin/lw-scanner mode: "u+x,g+x,o+x" - - name: Install chromium - yum: - name: chromium - state: latest - - name: Install doctl client for digital ocean get_url: url: https://github.com/digitalocean/doctl/releases/download/v{{ doctl_version }}/doctl-{{ doctl_version }}-linux-{{ ansible_architecture_alt }}.tar.gz @@ -129,7 +129,12 @@ remote_src: yes mode: "u+x,g+x,o+x" - - name: Instal yq + - name: Install Goreleaser + yum: + name: https://github.com/goreleaser/goreleaser/releases/download/v{{ goreleaser_version }}/goreleaser-{{ goreleaser_version }}-1.{{ ansible_architecture }}.rpm + state: present + + - name: install latest YQ (YAML processor) get_url: url: https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 dest: /usr/bin/yq @@ -198,6 +203,8 @@ - ubuntu:noble - rockylinux:8 - oraclelinux:9 + - public.ecr.aws/e7j3v3n0/rpmbuild:3 + - public.ecr.aws/e7j3v3n0/rpmbuild:2 - name: Turn off swap sysctl: diff --git a/build/scripts/build-client-rpm b/build/scripts/build-client-rpm index 938ba39ccb..7c178670d0 100755 --- a/build/scripts/build-client-rpm +++ b/build/scripts/build-client-rpm @@ -14,14 +14,6 @@ main() { set -o errexit set -o xtrace - if grep -q 'CentOS Linux 7' /etc/os-release; then - # disable fastestmirror plugin, which mostly fails due to CentOS 7 being EOL - sed -i 's/enabled=1/enabled=0/g' /etc/yum/pluginconf.d/fastestmirror.conf - - sed -i -e 's/^\(mirrorlist\)/#\1/g' /etc/yum.repos.d/CentOS-Base.repo - sed -i -e 's|^#baseurl.*|baseurl=http://vault.centos.org/centos/\$releasever/os/\$basearch/|g' /etc/yum.repos.d/CentOS-Base.repo - fi - export pmm_version=$pmm_version export pmm_release=$pmm_release export sudo_path=\$(ls /usr/bin/sudo) diff --git a/build/scripts/build-client-source b/build/scripts/build-client-source index df923cbee8..11d2a6211e 100755 --- a/build/scripts/build-client-source +++ b/build/scripts/build-client-source @@ -53,7 +53,7 @@ prepare_vmagent_tarball() { if [ -f "${tarball}" ]; then echo ${tarball} already exists, skipping download else - wget https://github.com/VictoriaMetrics/VictoriaMetrics/archive/${commit_hash}.tar.gz -O ${tarball} + curl -fsSL -o ${tarball} https://github.com/VictoriaMetrics/VictoriaMetrics/archive/${commit_hash}.tar.gz fi } diff --git a/build/scripts/build-client-srpm b/build/scripts/build-client-srpm index 2664613c66..c0e276f353 100755 --- a/build/scripts/build-client-srpm +++ b/build/scripts/build-client-srpm @@ -14,20 +14,12 @@ main() { set -o errexit set -o xtrace - if grep -q 'CentOS Linux 7' /etc/os-release; then - # disable fastestmirror plugin, which mostly fails due to CentOS 7 being EOL - sed -i 's/enabled=1/enabled=0/g' /etc/yum/pluginconf.d/fastestmirror.conf - - sed -i -e 's/^\(mirrorlist\)/#\1/g' /etc/yum.repos.d/CentOS-Base.repo - sed -i -e 's|^#baseurl.*|baseurl=http://vault.centos.org/centos/\$releasever/os/\$basearch/|g' /etc/yum.repos.d/CentOS-Base.repo - fi - export pmm_release=$pmm_release export sudo_path= if [ -e /usr/bin/sudo ]; then export sudo_path=\$(ls /usr/bin/sudo) fi - [[ ${IMAGE} = ${rpmbuild_docker_image} ]] || \$sudo_path yum -y install git rpm-build + mkdir -p /tmp/pmm pushd /home/builder/results /home/builder/bin/build-client-packages \ diff --git a/build/scripts/build-rpmbuild-docker b/build/scripts/build-rpmbuild-docker deleted file mode 100755 index 77706f3621..0000000000 --- a/build/scripts/build-rpmbuild-docker +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -set -o errexit -set -o xtrace - -. $(dirname $0)/vars - -root_dir=$(cd $(dirname $0)/../..; pwd -P) -docker build --squash -f ${docker_file} -t rpmbuild:local ${root_dir}/build/docker/rpmbuild/ - -# vim: expandtab shiftwidth=4 tabstop=4 diff --git a/build/scripts/build-server-rpm b/build/scripts/build-server-rpm index 8fdaa28d32..bd493a3d8a 100755 --- a/build/scripts/build-server-rpm +++ b/build/scripts/build-server-rpm @@ -26,9 +26,7 @@ is_build_needed() { local packages= # Structure of S3 build cache - # s3://pmm-build-cache/PR-BUILDS/7 - el7 # s3://pmm-build-cache/PR-BUILDS/9 - el9 - # s3://pmm-build-cache/RELEASE/7 - el7 # s3://pmm-build-cache/RELEASE/9 - el9 # RPM_EPOCH is set for Feature Builds @@ -109,14 +107,6 @@ build() { echo \"repo_name: ${repo_name}\" echo \"rpm_verison: ${rpm_version}\" - if grep -q 'CentOS Linux 7' /etc/os-release; then - # disable fastestmirror plugin, which mostly fails due to CentOS 7 being EOL - sudo sed -i 's/enabled=1/enabled=0/g' /etc/yum/pluginconf.d/fastestmirror.conf - - sudo sed -i -e 's/^\(mirrorlist\)/#\1/g' /etc/yum.repos.d/CentOS-Base.repo - sudo sed -i -e 's|^#baseurl.*|baseurl=http://vault.centos.org/centos/\$releasever/os/\$basearch/|g' /etc/yum.repos.d/CentOS-Base.repo - fi - sudo chown -R builder:builder /home/builder/rpm/RPMS /home/builder/rpm/SOURCES # Add 'Epoch' to spec file to prevent update of rpms which are built in PR build @@ -134,7 +124,7 @@ build() { sleep 1 done - sudo yum-builddep --randomwait=1 -y SOURCES/${spec_name}.spec + sudo yum-builddep -y SOURCES/${spec_name}.spec spectool -C SOURCES -g SOURCES/${spec_name}.spec rpmbuild --define '_rpmdir %{_topdir}/RPMS/${spec_name}-${rpm_version}' \ diff --git a/build/scripts/install_tarball b/build/scripts/install_tarball index 4ca4ce6ba8..954e368fa9 100755 --- a/build/scripts/install_tarball +++ b/build/scripts/install_tarball @@ -14,7 +14,7 @@ Example: To install PMM tarball - $0 To update the installed PMM tarball - $0 -u EOF - exit 1 + exit 1 } UPDATE=0 diff --git a/docs/api/pmm-server-config/troubleshooting/logs.md b/docs/api/pmm-server-config/troubleshooting/logs.md index 4672d28d6a..d74c075a07 100644 --- a/docs/api/pmm-server-config/troubleshooting/logs.md +++ b/docs/api/pmm-server-config/troubleshooting/logs.md @@ -28,7 +28,7 @@ The downloaded logs package contains the following structure: { "codes": [ { - "code": "# tree\n├── clickhouse-server.err.log\n├── clickhouse-server.log\n├── clickhouse-server.startup.log\n├── client\n│ ├── list.txt\n│ ├── pmm-admin-version.txt\n│ ├── pmm-agent-config.yaml\n│ ├── pmm-agent-version.txt\n│ └── status.json\n├── cron.log\n├── dashboard-upgrade.log\n├── grafana.log\n├── installed.json\n├── nginx.conf\n├── nginx.log\n├── nginx.startup.log\n├── pmm-agent.log\n├── pmm-agent.yaml\n├── pmm-managed.log\n├── pmm-ssl.conf\n├── pmm-update-perform-init.log\n├── pmm-update-perform.log\n├── pmm-version.txt\n├── pmm.conf\n├── pmm.ini\n├── postgresql.log\n├── postgresql.startup.log\n├── prometheus.base.yml\n├── prometheus.log\n├── qan-api2.ini\n├── qan-api2.log\n├── supervisorctl_status.log\n├── supervisord.conf\n├── supervisord.log\n├── systemctl_status.log\n├── victoriametrics-promscrape.yml\n├── victoriametrics.ini\n├── victoriametrics.log\n├── victoriametrics_targets.json\n├── vmalert.ini\n└── vmalert.log", + "code": "# tree\n├── clickhouse-server.err.log\n├── clickhouse-server.log\n├── clickhouse-server.startup.log\n├── client\n│ ├── list.txt\n│ ├── pmm-admin-version.txt\n│ ├── pmm-agent-config.yaml\n│ ├── pmm-agent-version.txt\n│ └── status.json\n├── cron.log\n├── dashboard-upgrade.log\n├── grafana.log\n├── installed.json\n├── nginx.conf\n├── nginx.log\n├── nginx.startup.log\n├── pmm-agent.log\n├── pmm-agent.yaml\n├── pmm-managed.log\n├── pmm-ssl.conf\n├── pmm-update-perform-init.log\n├── pmm-version.txt\n├── pmm.conf\n├── pmm.ini\n├── postgresql.log\n├── postgresql.startup.log\n├── prometheus.base.yml\n├── prometheus.log\n├── qan-api2.ini\n├── qan-api2.log\n├── supervisorctl_status.log\n├── supervisord.conf\n├── supervisord.log\n├── systemctl_status.log\n├── victoriametrics-promscrape.yml\n├── victoriametrics.ini\n├── victoriametrics.log\n├── victoriametrics_targets.json\n├── vmalert.ini\n└── vmalert.log", "language": "text" } ] diff --git a/docs/proposals/pmm/01-vm-artifacts-redesign/01-vm-artifacts-redesign.md b/docs/proposals/pmm/01-vm-artifacts-redesign/01-vm-artifacts-redesign.md deleted file mode 100644 index e69fb71192..0000000000 --- a/docs/proposals/pmm/01-vm-artifacts-redesign/01-vm-artifacts-redesign.md +++ /dev/null @@ -1,358 +0,0 @@ -# VM artifacts for PMM - -## Summary - -Containers are a lightweight solution to run on any platform. One such platform is Virtual Machine. There is no additional benefit to having a custom Virtual Machine image for PMM. - -Modern Operation Systems adopted a new pattern of minimal VMs that are designed to run containers and are Cloud Native: - -- Fedora CoreOS (FCOS) -- openSUSE MicroOS -- bottlerocket-os -- etc - -Those OSes provide additional capabilities compared to the custom image: - -- transactional updates -- auto-update -- init/bootstrap container or application in the image - -Instead of a custom VM image, we recommend using a more advanced base VM of your choice and running PMM as a container inside. There is additional "How To" documentation with examples and Migration documentation. - -## Motivation - -[Currently](https://github.com/percona/pmm/blob/pmm-3.35.0/build/packer/pmm.json), we build several VM artifacts with CentOS 7 as a base. There is also work to migrate that base to EL 9 base. -Migration to EL9 will not solve the problem but will further postpone it to a later time. - -VM was designed like a container - base image (CentOS 7/EL9) + ansible roles/playbooks/tasks to provision PMM inside the image. After provisioning, there is another custom image with PMM and all the needed software that could be used to bootstrap. - -As it is an additional artifact, it needs the following: - -- maintenance -- support -- testing - -There is an initiative and PoC that does half of the job to move from the custom VM: - -- https://jira.percona.com/browse/PMM-8306 -- https://github.com/percona/pmm-server/pull/343/files - -It shows the possibility of running PMM in a container and gets us closer to the end Goal. - -### Goals - -Deprecate custom VM to: - -- reduce maintenance: ansible roles, packer, pipelines -- reduce support: support one artifact instead of two -- reduce testing: update/upgrade, image validations, additional tests -- reduce cloud resources: building images, storing images -- increase speed: release testing cycle, ansible development, and maintenance - -Educate users on best practices to run PMM as a container in mature and modern VM environments. - -## Proposal - -Deprecate PMM VM artifact as part of the product. - -Current users could migrate to the container that could be run on various platforms (ECS, k8s, VMs, bare-metal). - -Provide "How To" run PMM in VM documentation and migration from custom images to upstream technologies documentation. - -## Design Details - -Container-oriented, Cloud native VM images support init/bootstrapping technologies, such as: - -- ignition -- cloud-init -- cfn-init -- etc - -Documentation should include: -- list of available VM image options and links to the upstream documentation. -- recommend using FCOS with Ignition and provide examples of how to run it for VirtualBox, AWS, GCP, and DigitalOcean: -https://docs.fedoraproject.org/en-US/fedora-coreos/stream-metadata/ - -Ignition config could be: - -- provided in the documentation -- sourced from the Percona-owned remote endpoint (URL), for example, Portal - -### Risks and Mitigations - -#### Value - -There will be only one artifact - containers. Remove confusion and extend the choice of the base image. - -The separate base image brings: - -- security -- capabilities - - auto-update - - transactions -- variety - -Separation of concerns, base image versus functionality, enables more capabilities for both. Image updates could happen more often and be more robust. For example, transactional updates allow automatic rollbacks if [health check](https://github.com/openSUSE/health-checker) wouldn't pass. - -Another value is reducing the cost of PMM products. - -#### Usability - -Removing additional artifacts reduces the users' scope and transfers base OS image maintenance to them. - -**Mitigation** - -Upstream documentation for the base VM images is rolling much faster and up to date. - -Produce additional documentation for usage and migration. - -`pmm-cli` could be adopted to support VM bootstrap for chosen platforms for better UX. - -#### Feasibility - -PoCs are done and show the feasibility of using such technology: - -- https://jira.percona.com/browse/PMM-8306 -- [Butane config](#poc) - -#### Business viability - -Telemetry shows that PMM on VM adoption is not that big, but the cost reduction could be much more significant with deprecating custom VM images. - -As security first company, we need to separate concerns and not carry additional (OS) layers that don't bring new value. - -**Mitigation** - -For essential users, we could handhold them to the new approach. - -### PoC - -I have validated that approach works running it locally as well as in the Cloud. - -Content of the `pmm-server-butane.yaml`: - -```yaml -variant: fcos -version: 1.4.0 -passwd: - users: - - name: core - ssh_authorized_keys: - - ssh-rsa AAAA... -systemd: - units: - - name: serial-getty@ttyS0.service - dropins: - - name: autologin-core.conf - contents: | - [Service] - # Override Execstart in main unit - ExecStart= - # Add new Execstart with `-` prefix to ignore failure - ExecStart=-/usr/sbin/agetty --autologin core --noclear %I $TERM - TTYVTDisallocate=no - - name: failure.service - enabled: true - contents: | - [Service] - Type=oneshot - ExecStart=/usr/bin/false - RemainAfterExit=yes - - [Install] - WantedBy=multi-user.target - - name: pmm-server.service - enabled: true - contents: | - [Unit] - Description=pmm-server - Wants=network-online.target - After=network-online.target - - [Service] - Type=simple - - # set environment for this unit - Environment=PMM_VOLUME_PATH=/var/lib/pmm-data/ - Environment=PMM_TAG=2.35.0 - Environment=PMM_IMAGE=docker.io/percona/pmm-server - - # optional env file that could override previous env settings for this unit - EnvironmentFile=-/var/lib/pmm-data/env - - ExecStart=/usr/bin/podman run --rm --replace=true --name=%N \ - --network=host --ulimit=host \ - --mount=type=bind,src=${PMM_VOLUME_PATH},dst=/srv,relabel=shared \ - --health-cmd=none --health-interval=disable \ - ${PMM_IMAGE}:${PMM_TAG} - ExecStop=/usr/bin/podman stop -t 10 %N - Restart=always - RestartSec=20 - - [Install] - Alias=%N - WantedBy=multi-user.target -storage: - disks: - - # pmm-data volume - device: /dev/disk/by-diskseq/2 - # We do not want to wipe the partition table since this is a persistent storage - wipe_table: false - partitions: - - number: 1 - label: pmm-data - # as large as possible - size_mib: 0 - resize: true - filesystems: - - path: /var/lib/pmm-data - device: /dev/disk/by-partlabel/pmm-data - format: xfs - # Ask Butane to generate a mount unit for us so that this filesystem - # gets mounted in the real root. - with_mount_unit: true - -``` - -Convert Butane to Ignition: -```sh -podman run --interactive --rm --security-opt label=disable \ - --volume ${PWD}:/pwd --workdir /pwd quay.io/coreos/butane:release \ - --pretty --strict pmm-server-butane.yaml > pmm-server.ign -``` - -Resulting [Ignition config](pmm-server.ign). - -#### Public Cloud - -GCP: - -```sh -STREAM="stable" -IGNITION_CONFIG="/absolute/path/pmm-server.ign" -VM_NAME="den-test-pmm-ignition" -DISK_NAME="den-test-pmm-data" -ZONE="us-central1-a" - -gcloud compute instances create --zone=${ZONE} --tags https-server \ ---metadata-from-file "user-data=${IGNITION_CONFIG}" \ ---image-project "fedora-coreos-cloud" --image-family "fedora-coreos-${STREAM}" \ ---create-disk "name=${DISK_NAME},size=20GB,device-name=pmm-server-data,auto-delete=no" \ -"${VM_NAME}" -``` -GCP will return External `IP` as outcome the command. `ssh` to that `IP`, go to `https://IP`. - -#### Libvirt - -**Install** - -Local demo on kvm: https://docs.fedoraproject.org/en-US/fedora-coreos/provisioning-qemu/. - -I will spin up local VM and check that it works. - -PMM data volume: - -```sh -qemu-img create /home/dkondratenko/.local/share/libvirt/images/sdb.qcow2 20G -``` - -Start VM: - -```sh -STREAM="stable" -IGNITION_CONFIG="/absolute/path/pmm-server.ign" -IMAGE="/home/user/.local/share/libvirt/images/fedora-coreos-37.20230205.3.0-qemu.x86_64.qcow2" -PMM_DATA="/home/user/.local/share/libvirt/images/sdb.qcow2" -VM_NAME="pmm-test-01" -VCPUS="2" -RAM_MB="2048" -DISK_GB="10" -# For x86 / aarch64, -IGNITION_DEVICE_ARG=(--qemu-commandline="-fw_cfg name=opt/com.coreos/config,file=${IGNITION_CONFIG}") - -# Setup the correct SELinux label to allow access to the config -chcon --verbose --type svirt_home_t ${IGNITION_CONFIG} - -virt-install --connect="qemu:///system" --name="${VM_NAME}" --vcpus="${VCPUS}" --memory="${RAM_MB}" \ - --os-variant="fedora-coreos-$STREAM" --import --graphics=none \ - --disk="size=${DISK_GB},backing_store=${IMAGE},boot.order=1" \ - --disk="serial=pmm-server-data,path=${PMM_DATA},boot.order=2" \ - --network bridge=virbr0 "${IGNITION_DEVICE_ARG[@]}" -``` - -Check `IP` address on a VM and go to the PMM UI in the browser using it. - -**Update** - -Let me demonstrate image replacement. - -Stop old VM and detach persistent storage: - -```sh -virsh --connect qemu:///system shutdown pmm-test-02 -virsh --connect qemu:///system detach-disk pmm-test-02 --persistent vdb -``` - -Spin new VM with new image and old storage: - -```sh -STREAM="testing" -IGNITION_CONFIG="/absolute/path/pmm-server.ign" -IMAGE="/home/user/.local/share/libvirt/images/fedora-coreos-37.20230218.2.0-qemu.x86_64.qcow2" -PMM_DATA="/home/user/.local/share/libvirt/images/sdb.qcow2" -VM_NAME="pmm-test-02" -VCPUS="2" -RAM_MB="2048" -DISK_GB="10" -# For x86 / aarch64, -IGNITION_DEVICE_ARG=(--qemu-commandline="-fw_cfg name=opt/com.coreos/config,file=${IGNITION_CONFIG}") - -# Setup the correct SELinux label to allow access to the config -chcon --verbose --type svirt_home_t ${IGNITION_CONFIG} - -virt-install --connect="qemu:///system" --name="${VM_NAME}" --vcpus="${VCPUS}" --memory="${RAM_MB}" \ - --os-variant="fedora-coreos-$STREAM" --import --graphics=none \ - --disk="size=${DISK_GB},backing_store=${IMAGE},boot.order=1" \ - --disk="serial=pmm-server-data,path=${PMM_DATA},boot.order=2" \ - --network bridge=virbr0 "${IGNITION_DEVICE_ARG[@]}" -``` - -Check `IP` address on a VM and got to the PMM UI in the browser using it. Validate that old and new data is present. - -#### Going beyond the PoC - -Podman was used just for the demo and could also be docker. - -There is no need to bind volume. Persistent volume should be mounted to the correct path so docker state and volumes would be saved there automatically. - -Ignition has a lot of features, so there could be: - -- additional service to re-size volume if it is expanded -- configuration files that change the behavior of the container (envs) -- auto-rollback -- etc - -## Drawbacks - -### AWS Marketplace - -Custom PMM image could be easily uploaded to the AWS Marketplace. - -But updates happen less often than it is required. For example, CVE in a base image would need to wait for the next PMM release. - -**Mitigation** - -AWS Marketplace supports [CloudFormation](https://docs.aws.amazon.com/marketplace/latest/userguide/cloudformation.html), with a similar init mechanism (`cfn-init`). So there could be an AWS Marketplace presence if we change the instrument from a custom image to the CloudFormation Template. - -Deprecating AWS Marketplace presence or offloading it to the partners is another way to mitigate this problem. - -### Migration - -It should be reasonably straightforward, as PMM data should be stored in a separate volume. There could be some issues with different users/groups, which could be documented or automated with Ignition. - -Documentation about migration from the custom PMM image to the cloud-native VM with a container should be developed. - -## Alternatives - -TBD - diff --git a/docs/proposals/pmm/01-vm-artifacts-redesign/pmm-server.ign b/docs/proposals/pmm/01-vm-artifacts-redesign/pmm-server.ign deleted file mode 100644 index a4d9476c24..0000000000 --- a/docs/proposals/pmm/01-vm-artifacts-redesign/pmm-server.ign +++ /dev/null @@ -1,66 +0,0 @@ -{ - "ignition": { - "version": "3.3.0" - }, - "passwd": { - "users": [ - { - "name": "core", - "sshAuthorizedKeys": [ - "ssh-rsa AAAA" - ] - } - ] - }, - "storage": { - "disks": [ - { - "device": "/dev/disk/by-diskseq/2", - "partitions": [ - { - "label": "pmm-data", - "number": 1, - "resize": true, - "sizeMiB": 0 - } - ], - "wipeTable": false - } - ], - "filesystems": [ - { - "device": "/dev/disk/by-partlabel/pmm-data", - "format": "xfs", - "path": "/var/lib/pmm-data" - } - ] - }, - "systemd": { - "units": [ - { - "contents": "# Generated by Butane\n[Unit]\nBefore=local-fs.target\nRequires=systemd-fsck@dev-disk-by\\x2dpartlabel-pmm\\x2ddata.service\nAfter=systemd-fsck@dev-disk-by\\x2dpartlabel-pmm\\x2ddata.service\n\n[Mount]\nWhere=/var/lib/pmm-data\nWhat=/dev/disk/by-partlabel/pmm-data\nType=xfs\n\n[Install]\nRequiredBy=local-fs.target", - "enabled": true, - "name": "var-lib-pmm\\x2ddata.mount" - }, - { - "dropins": [ - { - "contents": "[Service]\n# Override Execstart in main unit\nExecStart=\n# Add new Execstart with `-` prefix to ignore failure\nExecStart=-/usr/sbin/agetty --autologin core --noclear %I $TERM\nTTYVTDisallocate=no\n", - "name": "autologin-core.conf" - } - ], - "name": "serial-getty@ttyS0.service" - }, - { - "contents": "[Service]\nType=oneshot\nExecStart=/usr/bin/false\nRemainAfterExit=yes\n\n[Install]\nWantedBy=multi-user.target\n", - "enabled": true, - "name": "failure.service" - }, - { - "contents": "[Unit]\nDescription=pmm-server\nWants=network-online.target\nAfter=network-online.target\n\n[Service]\nType=simple\n\n# set environment for this unit\nEnvironment=PMM_VOLUME_PATH=/var/lib/pmm-data/\nEnvironment=PMM_TAG=2.35.0\nEnvironment=PMM_IMAGE=docker.io/percona/pmm-server\n\n# optional env file that could override previous env settings for this unit\nEnvironmentFile=-/var/lib/pmm-data/env\n\nExecStart=/usr/bin/podman run --rm --replace=true --name=%N \\\n --network=host --ulimit=host \\\n --mount=type=bind,src=${PMM_VOLUME_PATH},dst=/srv,relabel=shared \\\n --health-cmd=none --health-interval=disable \\\n ${PMM_IMAGE}:${PMM_TAG}\nExecStop=/usr/bin/podman stop -t 10 %N\nRestart=on-failure\nRestartSec=20\n\n[Install]\nAlias=%N\nWantedBy=multi-user.target\n", - "enabled": true, - "name": "pmm-server.service" - } - ] - } -} diff --git a/managed/services/server/logs_test.go b/managed/services/server/logs_test.go index 313b72ebe2..0b75ade526 100644 --- a/managed/services/server/logs_test.go +++ b/managed/services/server/logs_test.go @@ -171,11 +171,6 @@ func TestFiles(t *testing.T) { files := l.files(ctx, nil, maxLogReadLines) actual := make([]string, 0, len(files)) for _, f := range files { - // present only after update - if f.Name == "pmm-update-perform.log" { - continue - } - if f.Name == "prometheus.base.yml" { assert.EqualError(t, f.Err, "open /srv/prometheus/prometheus.base.yml: no such file or directory") continue @@ -226,12 +221,8 @@ func TestZip(t *testing.T) { actual := make([]string, 0, len(r.File)) for _, f := range r.File { - // present only after update - if f.Name == "pmm-update-perform.log" { - continue - } - - // skip with dynamic IDs now @TODO use regex to match ~ "client/pmm-agent/NODE_EXPORTER 297b465c-a767-4bc5-809d-d394a83c7086.log" + // skip with dynamic IDs now + // TODO: use regex to match ~ "client/pmm-agent/NODE_EXPORTER 297b465c-a767-4bc5-809d-d394a83c7086.log" if strings.Contains(f.Name, "client/pmm-agent/") && f.Name != "client/pmm-agent/pmm-agent.log" { continue }