From f73583f4a1d666ea97f1cba2de9afb54c4604d97 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 14 Sep 2023 11:02:13 +1000
Subject: [PATCH 01/39] Remove installation content from admin guide
jsc#PED-2842
---
xml/book_administration.xml | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/xml/book_administration.xml b/xml/book_administration.xml
index 9641bdd9..819f3fd7 100644
--- a/xml/book_administration.xml
+++ b/xml/book_administration.xml
@@ -55,18 +55,6 @@
-
-
-
-
- Installation and setup
-
-
-
-
-
-
-
From 9eb85ac8dba15dcc29af41bf21d0303ee2604452 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 14 Sep 2023 13:28:35 +1000
Subject: [PATCH 02/39] Add initial skeleton of full install guide
jsc#PED-2842
---
DC-SLE-HA-full-install | 25 +++++++++++
xml/MAIN.SLEHA.xml | 3 ++
xml/book_full_install.xml | 88 +++++++++++++++++++++++++++++++++++++++
xml/ha_install_intro.xml | 30 +++++++++++++
4 files changed, 146 insertions(+)
create mode 100644 DC-SLE-HA-full-install
create mode 100644 xml/book_full_install.xml
create mode 100644 xml/ha_install_intro.xml
diff --git a/DC-SLE-HA-full-install b/DC-SLE-HA-full-install
new file mode 100644
index 00000000..a8aa8ad6
--- /dev/null
+++ b/DC-SLE-HA-full-install
@@ -0,0 +1,25 @@
+## ----------------------------
+## Doc Config File for SUSE Linux Enterprise High Availability Extension
+## Full installation guide
+## ----------------------------
+##
+## Basics
+MAIN="MAIN.SLEHA.xml"
+ROOTID=book-full-install
+
+## Profiling
+PROFOS="sles"
+PROFCONDITION="suse-product"
+
+## stylesheet location
+STYLEROOT="/usr/share/xml/docbook/stylesheet/suse2022-ns"
+FALLBACK_STYLEROOT="/usr/share/xml/docbook/stylesheet/suse-ns"
+
+## enable sourcing
+export DOCCONF=$BASH_SOURCE
+
+##do not show remarks directly in the (PDF) text
+#XSLTPARAM="--param use.xep.annotate.pdf=0"
+
+### Sort the glossary
+XSLTPARAM="--param glossary.sort=1"
diff --git a/xml/MAIN.SLEHA.xml b/xml/MAIN.SLEHA.xml
index b452e831..b9b7b334 100644
--- a/xml/MAIN.SLEHA.xml
+++ b/xml/MAIN.SLEHA.xml
@@ -42,6 +42,9 @@
+
+
+
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
new file mode 100644
index 00000000..93fa1734
--- /dev/null
+++ b/xml/book_full_install.xml
@@ -0,0 +1,88 @@
+
+
+
+ %entities;
+]>
+
+
+
+
+
+
+
+ Installing High Availability clusters for critical workloads
+ &productname;
+ &productnameshort;
+ &productnumber;
+
+
+
+
+
+
+ TBD
+
+
+
+
+ yes
+
+
+
+
+
+
+
+
+ Planning for deployment
+
+
+
+
+
+
+
+
+
+ Installing HA nodes
+
+
+
+
+
+
+
+
+
+ Additional configuration
+
+
+
+
+
+
+
+
+ Testing the setup
+
+
+
+
+
+
+
+
+
+
diff --git a/xml/ha_install_intro.xml b/xml/ha_install_intro.xml
new file mode 100644
index 00000000..dd655a3d
--- /dev/null
+++ b/xml/ha_install_intro.xml
@@ -0,0 +1,30 @@
+
+
+ %entities;
+]>
+
+
+ Preface
+
+
+
+ editing
+
+
+ yes
+
+
+
+
+
+
+
+
+
+
+
From 6b583b6fb788353a6173b90f8a1dbb11ba7a05a9 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 9 Nov 2023 17:19:20 +1000
Subject: [PATCH 03/39] Move Architecture section above Benefits
---
xml/ha_concepts.xml | 348 ++++++++++++++++++++++----------------------
1 file changed, 174 insertions(+), 174 deletions(-)
diff --git a/xml/ha_concepts.xml b/xml/ha_concepts.xml
index 7f97ae5b..6e400150 100644
--- a/xml/ha_concepts.xml
+++ b/xml/ha_concepts.xml
@@ -334,6 +334,180 @@
+
+ Architecture
+
+ This section provides a brief overview of &productname; architecture. It
+ identifies and provides information on the architectural components, and
+ describes how those components interoperate.
+
+
+
+ Architecture layers
+
+ &productname; has a layered architecture.
+ illustrates
+ the different layers and their associated components.
+
+
+
+
+ Membership and messaging layer (Corosync)
+
+ This component provides reliable messaging, membership, and quorum information
+ about the cluster. This is handled by the Corosync cluster engine, a group
+ communication system.
+
+
+
+ Cluster resource manager (Pacemaker)
+
+ Pacemaker as cluster resource manager is the brain
+ which reacts to events occurring in the cluster. It is implemented as
+ pacemaker-controld, the cluster
+ controller, which coordinates all actions. Events can be nodes that join
+ or leave the cluster, failure of resources, or scheduled activities such
+ as maintenance, for example.
+
+
+
+ Local resource manager
+
+
+
+ The local resource manager is located between the Pacemaker layer and the
+ resources layer on each node. It is implemented as pacemaker-execd daemon. Through this daemon,
+ Pacemaker can start, stop, and monitor resources.
+
+
+
+
+ Cluster Information Database (CIB)
+
+
+ On every node, Pacemaker maintains the cluster information database
+ (CIB). It is an XML representation of the cluster configuration
+ (including cluster options, nodes, resources, constraints and the
+ relationship to each other). The CIB also reflects the current cluster
+ status. Each cluster node contains a CIB replica, which is synchronized
+ across the whole cluster. The pacemaker-based
+ daemon takes care of reading and writing cluster configuration and
+ status.
+
+
+
+ Designated Coordinator (DC)
+
+
+ The DC is elected from all nodes in the cluster. This happens if there
+ is no DC yet or if the current DC leaves the cluster for any reason.
+ The DC is the only entity in the cluster that can decide that a
+ cluster-wide change needs to be performed, such as fencing a node or
+ moving resources around. All other nodes get their configuration and
+ resource allocation information from the current DC.
+
+
+
+
+ Policy Engine
+
+
+
+ The policy engine runs on every node, but the one on the DC is the active
+ one. The engine is implemented as
+ pacemaker-schedulerd daemon.
+ When a cluster transition is needed, based on the current state and
+ configuration, pacemaker-schedulerd
+ calculates the expected next state of the cluster. It determines what
+ actions need to be scheduled to achieve the next state.
+
+
+
+
+
+
+ Resources and resource agents
+
+ In a &ha; cluster, the services that need to be highly available are
+ called resources. Resource agents (RAs) are scripts that start, stop, and
+ monitor cluster resources.
+
+
+
+
+
+ Process flow
+
+ The pacemakerd daemon launches and
+ monitors all other related daemons. The daemon that coordinates all actions,
+ pacemaker-controld, has an instance on
+ each cluster node. Pacemaker centralizes all cluster decision-making by
+ electing one of those instances as a primary. Should the elected pacemaker-controld daemon fail, a new primary is
+ established.
+
+
+ Many actions performed in the cluster will cause a cluster-wide change.
+ These actions can include things like adding or removing a cluster
+ resource or changing resource constraints. It is important to understand
+ what happens in the cluster when you perform such an action.
+
+
+ For example, suppose you want to add a cluster IP address resource. To
+ do this, you can use the &crmshell; or the Web interface to modify the CIB.
+ It is not required to perform the actions on the DC.
+ You can use either tool on any node in the cluster and they will be
+ relayed to the DC. The DC will then replicate the CIB change to all
+ cluster nodes.
+
+
+ Based on the information in the CIB, the pacemaker-schedulerd then computes the ideal
+ state of the cluster and how it should be achieved. It feeds a list of
+ instructions to the DC. The DC sends commands via the messaging/infrastructure
+ layer which are received by the pacemaker-controld peers on
+ other nodes. Each of them uses its local resource agent executor (implemented
+ as pacemaker-execd) to perform
+ resource modifications. The pacemaker-execd is not cluster-aware and interacts
+ directly with resource agents.
+
+
+ All peer nodes report the results of their operations back to the DC.
+ After the DC concludes that all necessary operations are successfully
+ performed in the cluster, the cluster will go back to the idle state and
+ wait for further events. If any operation was not carried out as
+ planned, the pacemaker-schedulerd
+ is invoked again with the new information recorded in
+ the CIB.
+
+
+ In some cases, it might be necessary to power off nodes to protect shared
+ data or complete resource recovery. In a Pacemaker cluster, the implementation
+ of node level fencing is &stonith;. For this, Pacemaker comes with a
+ fencing subsystem, pacemaker-fenced.
+ &stonith; devices must be configured as cluster resources (that use
+ specific fencing agents), because this allows monitoring of the fencing devices.
+ When clients detect a failure, they send a request to pacemaker-fenced,
+ which then executes the fencing agent to bring down the node.
+
+
+ Benefits
@@ -624,179 +798,5 @@
-
- Architecture
-
-
- This section provides a brief overview of &productname; architecture. It
- identifies and provides information on the architectural components, and
- describes how those components interoperate.
-
-
-
- Architecture layers
-
- &productname; has a layered architecture.
- illustrates
- the different layers and their associated components.
-
-
-
-
- Membership and messaging layer (Corosync)
-
- This component provides reliable messaging, membership, and quorum information
- about the cluster. This is handled by the Corosync cluster engine, a group
- communication system.
-
-
-
- Cluster resource manager (Pacemaker)
-
- Pacemaker as cluster resource manager is the brain
- which reacts to events occurring in the cluster. It is implemented as
- pacemaker-controld, the cluster
- controller, which coordinates all actions. Events can be nodes that join
- or leave the cluster, failure of resources, or scheduled activities such
- as maintenance, for example.
-
-
-
- Local resource manager
-
-
-
- The local resource manager is located between the Pacemaker layer and the
- resources layer on each node. It is implemented as pacemaker-execd daemon. Through this daemon,
- Pacemaker can start, stop, and monitor resources.
-
-
-
-
- Cluster Information Database (CIB)
-
-
- On every node, Pacemaker maintains the cluster information database
- (CIB). It is an XML representation of the cluster configuration
- (including cluster options, nodes, resources, constraints and the
- relationship to each other). The CIB also reflects the current cluster
- status. Each cluster node contains a CIB replica, which is synchronized
- across the whole cluster. The pacemaker-based
- daemon takes care of reading and writing cluster configuration and
- status.
-
-
-
- Designated Coordinator (DC)
-
-
- The DC is elected from all nodes in the cluster. This happens if there
- is no DC yet or if the current DC leaves the cluster for any reason.
- The DC is the only entity in the cluster that can decide that a
- cluster-wide change needs to be performed, such as fencing a node or
- moving resources around. All other nodes get their configuration and
- resource allocation information from the current DC.
-
-
-
-
- Policy Engine
-
-
-
- The policy engine runs on every node, but the one on the DC is the active
- one. The engine is implemented as
- pacemaker-schedulerd daemon.
- When a cluster transition is needed, based on the current state and
- configuration, pacemaker-schedulerd
- calculates the expected next state of the cluster. It determines what
- actions need to be scheduled to achieve the next state.
-
-
-
-
-
-
- Resources and resource agents
-
- In a &ha; cluster, the services that need to be highly available are
- called resources. Resource agents (RAs) are scripts that start, stop, and
- monitor cluster resources.
-
-
-
-
- Process flow
-
- The pacemakerd daemon launches and
- monitors all other related daemons. The daemon that coordinates all actions,
- pacemaker-controld, has an instance on
- each cluster node. Pacemaker centralizes all cluster decision-making by
- electing one of those instances as a primary. Should the elected pacemaker-controld daemon fail, a new primary is
- established.
-
-
- Many actions performed in the cluster will cause a cluster-wide change.
- These actions can include things like adding or removing a cluster
- resource or changing resource constraints. It is important to understand
- what happens in the cluster when you perform such an action.
-
-
- For example, suppose you want to add a cluster IP address resource. To
- do this, you can use the &crmshell; or the Web interface to modify the CIB.
- It is not required to perform the actions on the DC.
- You can use either tool on any node in the cluster and they will be
- relayed to the DC. The DC will then replicate the CIB change to all
- cluster nodes.
-
-
- Based on the information in the CIB, the pacemaker-schedulerd then computes the ideal
- state of the cluster and how it should be achieved. It feeds a list of
- instructions to the DC. The DC sends commands via the messaging/infrastructure
- layer which are received by the pacemaker-controld peers on
- other nodes. Each of them uses its local resource agent executor (implemented
- as pacemaker-execd) to perform
- resource modifications. The pacemaker-execd is not cluster-aware and interacts
- directly with resource agents.
-
-
- All peer nodes report the results of their operations back to the DC.
- After the DC concludes that all necessary operations are successfully
- performed in the cluster, the cluster will go back to the idle state and
- wait for further events. If any operation was not carried out as
- planned, the pacemaker-schedulerd
- is invoked again with the new information recorded in
- the CIB.
-
-
- In some cases, it might be necessary to power off nodes to protect shared
- data or complete resource recovery. In a Pacemaker cluster, the implementation
- of node level fencing is &stonith;. For this, Pacemaker comes with a
- fencing subsystem, pacemaker-fenced.
- &stonith; devices must be configured as cluster resources (that use
- specific fencing agents), because this allows monitoring of the fencing devices.
- When clients detect a failure, they send a request to pacemaker-fenced,
- which then executes the fencing agent to bring down the node.
-
-
-
From f74a2c52251b8a605b50eeeae0f6d66cc89c491a Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 9 Nov 2023 17:25:53 +1000
Subject: [PATCH 04/39] Move storage config examples into the architecture
section
---
xml/ha_concepts.xml | 165 ++++++++++++++++++++++----------------------
1 file changed, 82 insertions(+), 83 deletions(-)
diff --git a/xml/ha_concepts.xml b/xml/ha_concepts.xml
index 6e400150..347b2b6b 100644
--- a/xml/ha_concepts.xml
+++ b/xml/ha_concepts.xml
@@ -507,6 +507,88 @@
which then executes the fencing agent to bring down the node.
+
+ Cluster configurations: storage
+
+
+ Cluster configurations with &productname; might or might not include a
+ shared disk subsystem. The shared disk subsystem can be connected via
+ high-speed Fibre Channel cards, cables, and switches, or it can be
+ configured to use iSCSI. If a node fails, another designated node in
+ the cluster automatically mounts the shared disk directories that were
+ previously mounted on the failed node. This gives network users
+ continuous access to the directories on the shared disk subsystem.
+
+
+
+ Shared disk subsystem with LVM
+
+ When using a shared disk subsystem with LVM, that subsystem must be
+ connected to all servers in the cluster from which it needs to be
+ accessed.
+
+
+
+
+ Typical resources might include data, applications, and services. The
+ following figures show how a typical Fibre Channel cluster configuration
+ might look.
+ The green lines depict connections to an Ethernet power switch. Such
+ a device can be controlled over a network and can reboot
+ a node when a ping request fails.
+
+
+
+
+
+ Although Fibre Channel provides the best performance, you can also
+ configure your cluster to use iSCSI. iSCSI is an alternative to Fibre
+ Channel that can be used to create a low-cost Storage Area Network (SAN).
+ The following figure shows how a typical iSCSI cluster configuration
+ might look.
+
+
+
+
+
+ Although most clusters include a shared disk subsystem, it is also
+ possible to create a cluster without a shared disk subsystem. The
+ following figure shows how a cluster without a shared disk subsystem
+ might look.
+
+
+
+ Benefits
@@ -716,87 +798,4 @@
or increasing performance or accessibility of the Web sites.
-
- Cluster configurations: storage
-
-
- Cluster configurations with &productname; might or might not include a
- shared disk subsystem. The shared disk subsystem can be connected via
- high-speed Fibre Channel cards, cables, and switches, or it can be
- configured to use iSCSI. If a node fails, another designated node in
- the cluster automatically mounts the shared disk directories that were
- previously mounted on the failed node. This gives network users
- continuous access to the directories on the shared disk subsystem.
-
-
-
- Shared disk subsystem with LVM
-
- When using a shared disk subsystem with LVM, that subsystem must be
- connected to all servers in the cluster from which it needs to be
- accessed.
-
-
-
-
- Typical resources might include data, applications, and services. The
- following figures show how a typical Fibre Channel cluster configuration
- might look.
- The green lines depict connections to an Ethernet power switch. Such
- a device can be controlled over a network and can reboot
- a node when a ping request fails.
-
-
-
-
-
- Although Fibre Channel provides the best performance, you can also
- configure your cluster to use iSCSI. iSCSI is an alternative to Fibre
- Channel that can be used to create a low-cost Storage Area Network (SAN).
- The following figure shows how a typical iSCSI cluster configuration
- might look.
-
-
-
-
-
- Although most clusters include a shared disk subsystem, it is also
- possible to create a cluster without a shared disk subsystem. The
- following figure shows how a cluster without a shared disk subsystem
- might look.
-
-
-
-
-
From 9f3e56f39a1efb81955a7d50d2032b3fac045baa Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 9 Nov 2023 17:33:50 +1000
Subject: [PATCH 05/39] Remove xref to the glossary
It's no longer in the same guide,
and the weirdly the xref resulted in a blank space
instead of the Admin Guide's title
---
xml/ha_concepts.xml | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/xml/ha_concepts.xml b/xml/ha_concepts.xml
index 347b2b6b..4475e617 100644
--- a/xml/ha_concepts.xml
+++ b/xml/ha_concepts.xml
@@ -30,11 +30,7 @@
overview of the architecture, describing the individual architecture
layers and processes within the cluster.
-
- For explanations of some common terms used in the context of &ha;
- clusters, refer to .
-
-
+
editing
From de27d8ba698dc28b768a2c051947a2f2a4a8338e Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 9 Nov 2023 17:49:32 +1000
Subject: [PATCH 06/39] Move Architecture back to the end of the chapter
Oops I changed my mind
---
xml/ha_concepts.xml | 418 ++++++++++++++++++++++----------------------
1 file changed, 209 insertions(+), 209 deletions(-)
diff --git a/xml/ha_concepts.xml b/xml/ha_concepts.xml
index 4475e617..69a7e2d4 100644
--- a/xml/ha_concepts.xml
+++ b/xml/ha_concepts.xml
@@ -330,7 +330,215 @@
-
+
+ Benefits
+
+
+ &productname; allows you to configure up to 32 Linux servers into a
+ high-availability cluster (HA cluster). Resources can be
+ dynamically switched or moved to any node in the cluster. Resources can
+ be configured to automatically migrate if a node fails, or they can be
+ moved manually to troubleshoot hardware or balance the workload.
+
+
+
+ &productname; provides high availability from commodity components. Lower
+ costs are obtained through the consolidation of applications and
+ operations onto a cluster. &productname; also allows you to centrally
+ manage the complete cluster. You can adjust resources to meet changing
+ workload requirements (thus, manually load balance the
+ cluster). Allowing clusters of more than two nodes also provides savings
+ by allowing several nodes to share a hot spare.
+
+
+
+ An equally important benefit is the potential reduction of unplanned
+ service outages and planned outages for software and hardware
+ maintenance and upgrades.
+
+
+
+ Reasons that you would want to implement a cluster include:
+
+
+
+
+
+ Increased availability
+
+
+
+
+ Improved performance
+
+
+
+
+ Low cost of operation
+
+
+
+
+ Scalability
+
+
+
+
+ Disaster recovery
+
+
+
+
+ Data protection
+
+
+
+
+ Server consolidation
+
+
+
+
+ Storage consolidation
+
+
+
+
+
+ Shared disk fault tolerance can be obtained by implementing RAID on the
+ shared disk subsystem.
+
+
+
+ The following scenario illustrates some benefits &productname; can
+ provide.
+
+
+ Example cluster scenario
+
+
+ Suppose you have configured a three-node cluster, with a Web server
+ installed on each of the three nodes in the cluster. Each of the
+ nodes in the cluster hosts two Web sites. All the data, graphics, and
+ Web page content for each Web site are stored on a shared disk subsystem
+ connected to each of the nodes in the cluster. The following figure
+ depicts how this setup might look.
+
+
+
+
+
+ During normal cluster operation, each node is in constant communication
+ with the other nodes in the cluster and performs periodic polling of
+ all registered resources to detect failure.
+
+
+
+ Suppose Web Server 1 experiences hardware or software problems and the
+ users depending on Web Server 1 for Internet access, e-mail, and
+ information lose their connections. The following figure shows how
+ resources are moved when Web Server 1 fails.
+
+
+
+
+
+ Web Site A moves to Web Server 2 and Web Site B moves to Web Server 3. IP
+ addresses and certificates also move to Web Server 2 and Web Server 3.
+
+
+
+ When you configured the cluster, you decided where the Web sites hosted
+ on each Web server would go should a failure occur. In the previous
+ example, you configured Web Site A to move to Web Server 2 and Web Site B
+ to move to Web Server 3. This way, the workload formerly handled by Web
+ Server 1 continues to be available and is evenly distributed between any
+ surviving cluster members.
+
+
+
+ When Web Server 1 failed, the &ha; software did the following:
+
+
+
+
+
+ Detected a failure and verified with &stonith; that Web Server 1 was
+ really dead. &stonith; is an acronym for Shoot The Other Node
+ In The Head. It is a means of bringing down misbehaving nodes
+ to prevent them from causing trouble in the cluster.
+
+
+
+
+ Remounted the shared data directories that were formerly mounted on Web
+ server 1 on Web Server 2 and Web Server 3.
+
+
+
+
+ Restarted applications that were running on Web Server 1 on Web Server
+ 2 and Web Server 3.
+
+
+
+
+ Transferred IP addresses to Web Server 2 and Web Server 3.
+
+
+
+
+
+ In this example, the failover process happened quickly and users regained
+ access to Web site information within seconds, usually without needing to
+ log in again.
+
+
+
+ Now suppose the problems with Web Server 1 are resolved, and Web Server 1
+ is returned to a normal operating state. Web Site A and Web Site B can
+ either automatically fail back (move back) to Web Server 1, or they can
+ stay where they are. This depends on how you configured the resources for
+ them. Migrating the services back to Web Server 1 will incur some
+ down-time. Therefore &productname; also allows you to defer the migration until
+ a period when it will cause little or no service interruption. There are
+ advantages and disadvantages to both alternatives.
+
+
+
+ &productname; also provides resource migration capabilities. You can move
+ applications, Web sites, etc. to other servers in your cluster as
+ required for system management.
+
+
+
+ For example, you could have manually moved Web Site A or Web Site B from
+ Web Server 1 to either of the other servers in the cluster. Use cases for
+ this are upgrading or performing scheduled maintenance on Web Server 1,
+ or increasing performance or accessibility of the Web sites.
+
+
+ Architecture
This section provides a brief overview of &productname; architecture. It
@@ -586,212 +794,4 @@
-
- Benefits
-
-
- &productname; allows you to configure up to 32 Linux servers into a
- high-availability cluster (HA cluster). Resources can be
- dynamically switched or moved to any node in the cluster. Resources can
- be configured to automatically migrate if a node fails, or they can be
- moved manually to troubleshoot hardware or balance the workload.
-
-
-
- &productname; provides high availability from commodity components. Lower
- costs are obtained through the consolidation of applications and
- operations onto a cluster. &productname; also allows you to centrally
- manage the complete cluster. You can adjust resources to meet changing
- workload requirements (thus, manually load balance the
- cluster). Allowing clusters of more than two nodes also provides savings
- by allowing several nodes to share a hot spare.
-
-
-
- An equally important benefit is the potential reduction of unplanned
- service outages and planned outages for software and hardware
- maintenance and upgrades.
-
-
-
- Reasons that you would want to implement a cluster include:
-
-
-
-
-
- Increased availability
-
-
-
-
- Improved performance
-
-
-
-
- Low cost of operation
-
-
-
-
- Scalability
-
-
-
-
- Disaster recovery
-
-
-
-
- Data protection
-
-
-
-
- Server consolidation
-
-
-
-
- Storage consolidation
-
-
-
-
-
- Shared disk fault tolerance can be obtained by implementing RAID on the
- shared disk subsystem.
-
-
-
- The following scenario illustrates some benefits &productname; can
- provide.
-
-
- Example cluster scenario
-
-
- Suppose you have configured a three-node cluster, with a Web server
- installed on each of the three nodes in the cluster. Each of the
- nodes in the cluster hosts two Web sites. All the data, graphics, and
- Web page content for each Web site are stored on a shared disk subsystem
- connected to each of the nodes in the cluster. The following figure
- depicts how this setup might look.
-
-
-
-
-
- During normal cluster operation, each node is in constant communication
- with the other nodes in the cluster and performs periodic polling of
- all registered resources to detect failure.
-
-
-
- Suppose Web Server 1 experiences hardware or software problems and the
- users depending on Web Server 1 for Internet access, e-mail, and
- information lose their connections. The following figure shows how
- resources are moved when Web Server 1 fails.
-
-
-
-
-
- Web Site A moves to Web Server 2 and Web Site B moves to Web Server 3. IP
- addresses and certificates also move to Web Server 2 and Web Server 3.
-
-
-
- When you configured the cluster, you decided where the Web sites hosted
- on each Web server would go should a failure occur. In the previous
- example, you configured Web Site A to move to Web Server 2 and Web Site B
- to move to Web Server 3. This way, the workload formerly handled by Web
- Server 1 continues to be available and is evenly distributed between any
- surviving cluster members.
-
-
-
- When Web Server 1 failed, the &ha; software did the following:
-
-
-
-
-
- Detected a failure and verified with &stonith; that Web Server 1 was
- really dead. &stonith; is an acronym for Shoot The Other Node
- In The Head. It is a means of bringing down misbehaving nodes
- to prevent them from causing trouble in the cluster.
-
-
-
-
- Remounted the shared data directories that were formerly mounted on Web
- server 1 on Web Server 2 and Web Server 3.
-
-
-
-
- Restarted applications that were running on Web Server 1 on Web Server
- 2 and Web Server 3.
-
-
-
-
- Transferred IP addresses to Web Server 2 and Web Server 3.
-
-
-
-
-
- In this example, the failover process happened quickly and users regained
- access to Web site information within seconds, usually without needing to
- log in again.
-
-
-
- Now suppose the problems with Web Server 1 are resolved, and Web Server 1
- is returned to a normal operating state. Web Site A and Web Site B can
- either automatically fail back (move back) to Web Server 1, or they can
- stay where they are. This depends on how you configured the resources for
- them. Migrating the services back to Web Server 1 will incur some
- down-time. Therefore &productname; also allows you to defer the migration until
- a period when it will cause little or no service interruption. There are
- advantages and disadvantages to both alternatives.
-
-
-
- &productname; also provides resource migration capabilities. You can move
- applications, Web sites, etc. to other servers in your cluster as
- required for system management.
-
-
-
- For example, you could have manually moved Web Site A or Web Site B from
- Web Server 1 to either of the other servers in the cluster. Use cases for
- this are upgrading or performing scheduled maintenance on Web Server 1,
- or increasing performance or accessibility of the Web sites.
-
-
From 3830285fd4521f51dcdfb2ebe0cb8fb53b014773 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 10 Nov 2023 16:29:59 +1000
Subject: [PATCH 07/39] Add 'Starting the' to 'YaST Cluster module'
---
xml/ha_yast_cluster.xml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/xml/ha_yast_cluster.xml b/xml/ha_yast_cluster.xml
index dd73e56a..56af2b22 100644
--- a/xml/ha_yast_cluster.xml
+++ b/xml/ha_yast_cluster.xml
@@ -204,13 +204,13 @@
- &yast; Cluster module
+ Starting the &yast; Cluster module
Start &yast; and select &ha;Cluster. Alternatively, start the
- module from command line:
+ module from the command line:
- sudo yast2 cluster
+ &prompt.user;sudo yast2 cluster
The following list shows an overview of the available screens in the
From a21c4d07f5ab1623d666d2253946abab34011d9f Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 24 Jan 2024 14:10:40 +1000
Subject: [PATCH 08/39] Add (empty) new chapter files
---
xml/book_full_install.xml | 3 +++
xml/ha_add_nodes.xml | 28 ++++++++++++++++++++++++++++
xml/ha_bootstrap_install.xml | 28 ++++++++++++++++++++++++++++
xml/ha_installation_overview.xml | 28 ++++++++++++++++++++++++++++
4 files changed, 87 insertions(+)
create mode 100644 xml/ha_add_nodes.xml
create mode 100644 xml/ha_bootstrap_install.xml
create mode 100644 xml/ha_installation_overview.xml
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
index 93fa1734..ae670e77 100644
--- a/xml/book_full_install.xml
+++ b/xml/book_full_install.xml
@@ -46,6 +46,7 @@
+
@@ -55,7 +56,9 @@
Installing HA nodes
+
+
diff --git a/xml/ha_add_nodes.xml b/xml/ha_add_nodes.xml
new file mode 100644
index 00000000..74b1d7ef
--- /dev/null
+++ b/xml/ha_add_nodes.xml
@@ -0,0 +1,28 @@
+
+
+
+ %entities;
+]>
+
+
+ Adding more nodes
+
+
+
+
+
+
+
+
+ yes
+
+
+
+
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
new file mode 100644
index 00000000..cf362e14
--- /dev/null
+++ b/xml/ha_bootstrap_install.xml
@@ -0,0 +1,28 @@
+
+
+
+ %entities;
+]>
+
+
+ Using the bootstrap script
+
+
+
+
+
+
+
+
+ yes
+
+
+
+
diff --git a/xml/ha_installation_overview.xml b/xml/ha_installation_overview.xml
new file mode 100644
index 00000000..123d12cc
--- /dev/null
+++ b/xml/ha_installation_overview.xml
@@ -0,0 +1,28 @@
+
+
+
+ %entities;
+]>
+
+
+ Installation overview
+
+
+
+You can also use a combination of both setup methods, for example: set up one node with YaST cluster and then use one of the bootstrap scripts to integrate more nodes (or vice versa).
+
+
+
+
+ yes
+
+
+
+
From 4805b2aa36182dc3284990456f2d84a191ba6afe Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 24 Jan 2024 16:33:51 +1000
Subject: [PATCH 09/39] Move autoyast to new Add Nodes chapter
---
xml/ha_add_nodes.xml | 129 +++++++++++++++++++++++++++++++++++++++
xml/ha_install.xml | 141 -------------------------------------------
2 files changed, 129 insertions(+), 141 deletions(-)
diff --git a/xml/ha_add_nodes.xml b/xml/ha_add_nodes.xml
index 74b1d7ef..23194009 100644
--- a/xml/ha_add_nodes.xml
+++ b/xml/ha_add_nodes.xml
@@ -25,4 +25,133 @@
+
+
+ Adding nodes with &ay;
+
+
+ After you have installed and set up a two-node cluster, you can extend the
+ cluster by cloning existing nodes with &ay; and adding the clones to the cluster.
+
+
+ &ay; uses profiles that contains installation and configuration data.
+ A profile tells &ay; what to install and how to configure the installed system to
+ get a ready-to-use system in the end. This profile can then be used
+ for mass deployment in different ways (for example, to clone existing
+ cluster nodes).
+
+
+ For detailed instructions on how to use &ay; in various scenarios,
+ see the
+ &ayguide; for &sls; &productnumber;.
+
+
+
+ Identical hardware
+
+ assumes you are rolling
+ out &productname; &productnumber; to a set of machines with identical hardware
+ configurations.
+
+
+ If you need to deploy cluster nodes on non-identical hardware, refer to the
+ &deploy; for &sls; &productnumber;,
+ chapter Automated Installation, section
+ Rule-Based Autoinstallation.
+
+
+
+
+ Cloning a cluster node with &ay;
+
+
+ Make sure the node you want to clone is correctly installed and
+ configured. For details, see the &haquick; or
+ .
+
+
+
+
+ Follow the description outlined in the &sle;
+ &productnumber; &deploy; for simple mass
+ installation. This includes the following basic steps:
+
+
+
+
+ Creating an &ay; profile. Use the &ay; GUI to create and modify
+ a profile based on the existing system configuration. In &ay;,
+ choose the &ha; module and click the
+ Clone button. If needed, adjust the configuration
+ in the other modules and save the resulting control file as XML.
+
+
+ If you have configured DRBD, you can select and clone this module in
+ the &ay; GUI, too.
+
+
+
+
+ Determining the source of the &ay; profile and the parameter to
+ pass to the installation routines for the other nodes.
+
+
+
+
+ Determining the source of the &sls; and &productname;
+ installation data.
+
+
+
+
+ Determining and setting up the boot scenario for autoinstallation.
+
+
+
+
+ Passing the command line to the installation routines, either by
+ adding the parameters manually or by creating an
+ info file.
+
+
+
+
+ Starting and monitoring the autoinstallation process.
+
+
+
+
+
+
+
+ After the clone has been successfully installed, execute the following
+ steps to make the cloned node join the cluster:
+
+
+
+ Bringing the cloned node online
+
+
+ Transfer the key configuration files from the already configured nodes
+ to the cloned node with &csync; as described in
+ .
+
+
+
+
+ To bring the node online, start the cluster services on the cloned
+ node as described in .
+
+
+
+
+
+ The cloned node now joins the cluster because the
+ /etc/corosync/corosync.conf file has been applied to
+ the cloned node via &csync;. The CIB is automatically synchronized
+ among the cluster nodes.
+
+
+
diff --git a/xml/ha_install.xml b/xml/ha_install.xml
index 806464d1..1cd874da 100644
--- a/xml/ha_install.xml
+++ b/xml/ha_install.xml
@@ -20,10 +20,6 @@
have the same packages installed and the same system configuration as the
original ones.
-
- If you want to upgrade an existing cluster that runs an older version of
- &productname;, refer to .
-
@@ -45,142 +41,5 @@
basic two-node cluster.
-
- Mass installation and deployment with &ay;
-
-
- After you have installed and set up a two-node cluster, you can extend the
- cluster by cloning existing nodes with &ay; and adding the clones to the cluster.
-
-
- &ay; uses profiles that contains installation and configuration data.
- A profile tells &ay; what to install and how to configure the installed system to
- get a ready-to-use system in the end. This profile can then be used
- for mass deployment in different ways (for example, to clone existing
- cluster nodes).
-
-
- For detailed instructions on how to use &ay; in various scenarios,
- see the
- &ayguide; for &sls; &productnumber;.
-
-
-
- Identical hardware
-
- assumes you are rolling
- out &productname; &productnumber; to a set of machines with identical hardware
- configurations.
-
-
- If you need to deploy cluster nodes on non-identical hardware, refer to the
- &deploy; for &sls; &productnumber;,
- chapter Automated Installation, section
- Rule-Based Autoinstallation.
-
-
-
-
-
-
- Cloning a cluster node with &ay;
-
-
- Make sure the node you want to clone is correctly installed and
- configured. For details, see the &haquick; or
- .
-
-
-
-
- Follow the description outlined in the &sle;
- &productnumber; &deploy; for simple mass
- installation. This includes the following basic steps:
-
-
-
-
- Creating an &ay; profile. Use the &ay; GUI to create and modify
- a profile based on the existing system configuration. In &ay;,
- choose the &ha; module and click the
- Clone button. If needed, adjust the configuration
- in the other modules and save the resulting control file as XML.
-
-
-
- If you have configured DRBD, you can select and clone this module in
- the &ay; GUI, too.
-
-
-
-
- Determining the source of the &ay; profile and the parameter to
- pass to the installation routines for the other nodes.
-
-
-
-
- Determining the source of the &sls; and &productname;
- installation data.
-
-
-
-
- Determining and setting up the boot scenario for autoinstallation.
-
-
-
-
- Passing the command line to the installation routines, either by
- adding the parameters manually or by creating an
- info file.
-
-
-
-
- Starting and monitoring the autoinstallation process.
-
-
-
-
-
-
-
- After the clone has been successfully installed, execute the following
- steps to make the cloned node join the cluster:
-
-
-
- Bringing the cloned node online
-
-
- Transfer the key configuration files from the already configured nodes
- to the cloned node with &csync; as described in
- .
-
-
-
-
- To bring the node online, start the cluster services on the cloned
- node as described in .
-
-
-
-
-
- The cloned node now joins the cluster because the
- /etc/corosync/corosync.conf file has been applied to
- the cloned node via &csync;. The CIB is automatically synchronized
- among the cluster nodes.
-
-
From 843df987201b3835152686fef1dfc7993291bee6 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 31 Jan 2024 16:22:01 +1000
Subject: [PATCH 10/39] Moving some things around
---
xml/book_full_install.xml | 2 +-
xml/ha_add_nodes.xml | 13 ++++++++
xml/ha_install.xml | 57 ++++++++++++++++++++++++--------
xml/ha_installation_overview.xml | 22 ++++++++++++
xml/ha_yast_cluster.xml | 33 +++++-------------
5 files changed, 88 insertions(+), 39 deletions(-)
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
index ae670e77..ef60c553 100644
--- a/xml/book_full_install.xml
+++ b/xml/book_full_install.xml
@@ -53,7 +53,7 @@
- Installing HA nodes
+ Installing cluster nodes
diff --git a/xml/ha_add_nodes.xml b/xml/ha_add_nodes.xml
index 23194009..c3fa6035 100644
--- a/xml/ha_add_nodes.xml
+++ b/xml/ha_add_nodes.xml
@@ -25,6 +25,19 @@
+
+ Adding nodes with crm cluster join
+
+
+
+
+
+
+ Adding nodes manually
+
+
+
+ Adding nodes with &ay;
diff --git a/xml/ha_install.xml b/xml/ha_install.xml
index 1cd874da..5d489538 100644
--- a/xml/ha_install.xml
+++ b/xml/ha_install.xml
@@ -4,21 +4,19 @@
%entities;
]>
-
+
-
+
Installing &productname;
- If you are setting up a &ha; cluster with &productnamereg; for the first time, the
- easiest way is to start with a basic two-node cluster. You can also use the
- two-node cluster to run some tests. Afterward, you can add more
- nodes by cloning existing cluster nodes with &ay;. The cloned nodes will
- have the same packages installed and the same system configuration as the
- original ones.
+
+ The packages for configuring and managing a cluster are included in the &ha; installation pattern.
+ This pattern is only available after the &productname; extension (&slehaa;) is installed.
+ &slehaa; can be installed along with &sles; (&slsa;), or after &slsa; is already installed.
@@ -33,13 +31,44 @@
-
- Manual installation
- For the manual installation of the packages for &ha; refer to
- . It leads you through the setup of a
- basic two-node cluster.
+ To install &slehaa; along with &slsa;, see the
+
+ &deploy; for &sles;.
+ To install &slehaa; after &slsa; is already installed, use this procedure:
-
+
+ Requirements
+
+
+ &sles; is installed and registered with the &scc;.
+
+
+
+
+ You have an additional registration code for &productname;.
+
+
+
+
+ Installing the &ha; packages
+
+
+ Enable the &ha; extension:
+
+&prompt.user;sudo SUSEConnect -p sle-ha/&product-ga;.&product-sp;/x86_64 -r ADDITIONAL_REGCODE
+
+
+
+ Install the &ha; pattern:
+&prompt.user;sudo zypper install -t pattern ha_sles
+
+
+
+ Install the &ha; pattern on all machines that
+ will be part of your cluster.
+
+
+
diff --git a/xml/ha_installation_overview.xml b/xml/ha_installation_overview.xml
index 123d12cc..f921b6dd 100644
--- a/xml/ha_installation_overview.xml
+++ b/xml/ha_installation_overview.xml
@@ -24,5 +24,27 @@ You can also use a combination of both setup methods, for example: set up one no
yes
+
+ If you are setting up a &ha; cluster with &productnamereg; for the first time, the
+ easiest way is to start with a basic two-node cluster. You can also use the
+ two-node cluster to run some tests. Afterward, you can add more
+ nodes by cloning existing cluster nodes with &ay;. The cloned nodes will
+ have the same packages installed and the same system configuration as the
+ original ones.
+
+
+
+ Workflow options
+
+
+
+
+
+
+ Preconfiguration options
+
+
+
+
diff --git a/xml/ha_yast_cluster.xml b/xml/ha_yast_cluster.xml
index 56af2b22..2e2b3688 100644
--- a/xml/ha_yast_cluster.xml
+++ b/xml/ha_yast_cluster.xml
@@ -16,17 +16,6 @@
The &yast; cluster module allows you to set up a cluster manually
(from scratch) or to modify options for an existing cluster.
-
- However, if you prefer an automated approach for setting up a cluster,
- refer to . It describes how to install the
- needed packages and leads you to a basic two-node cluster, which is
- set up with the bootstrap scripts provided by the &crmshell;.
-
-
- You can also use a combination of both setup methods, for example: set up
- one node with &yast; cluster and then use one of the bootstrap scripts
- to integrate more nodes (or vice versa).
-
@@ -211,7 +200,12 @@
module from the command line:
&prompt.user;sudo yast2 cluster
-
+
+ If you start the cluster module for the first time, it appears as a
+ wizard, guiding you through all the steps necessary for basic setup.
+ Otherwise, click the categories on the left panel to access the
+ configuration options for each step.
+
The following list shows an overview of the available screens in the
&yast; cluster module. It also mentions whether the screen contains parameters that
@@ -229,9 +223,8 @@
Redundant communication pathsFor a supported cluster setup, two or more redundant communication
- paths are required. The preferred way is to use network device bonding as
- described in .
- If this is impossible, you need to define a second communication
+ paths are required. The preferred way is to use network device bonding.
+ If this is impossible, you must define a second communication
channel in &corosync;.
@@ -279,14 +272,6 @@
-
-
- If you start the cluster module for the first time, it appears as a
- wizard, guiding you through all the steps necessary for basic setup.
- Otherwise, click the categories on the left panel to access the
- configuration options for each step.
-
-
Settings in the &yast; Cluster moduleCertain settings in the &yast; cluster module apply only to the
@@ -1036,7 +1021,7 @@ Finished with 1 errors.
crm status command. If all nodes are
online, the output should be similar to the following:
-&prompt.root;crm status
+&prompt.root;crm status
Cluster Summary:
* Stack: corosync
* Current DC: &node1; (version ...) - partition with quorum
From 0693f98e0758d30a1e2db703c629a30cda95c9b3 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 31 Jan 2024 17:42:54 +1000
Subject: [PATCH 11/39] Add watchdog procedures
---
xml/book_full_install.xml | 1 +
xml/ha_sbd_watchdog.xml | 216 ++++++++++++++++++++++++++++++++++++++
2 files changed, 217 insertions(+)
create mode 100644 xml/ha_sbd_watchdog.xml
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
index ef60c553..63acf620 100644
--- a/xml/book_full_install.xml
+++ b/xml/book_full_install.xml
@@ -56,6 +56,7 @@
Installing cluster nodes
+
diff --git a/xml/ha_sbd_watchdog.xml b/xml/ha_sbd_watchdog.xml
new file mode 100644
index 00000000..df3848d7
--- /dev/null
+++ b/xml/ha_sbd_watchdog.xml
@@ -0,0 +1,216 @@
+
+
+
+ %entities;
+]>
+
+
+ Setting up a watchdog for SBD
+
+
+
+ If you are using SBD as your &stonith; device, you must enable a watchdog on each
+ cluster node. If you are using a different &stonith; device, you can skip this chapter.
+
+
+
+
+ yes
+
+
+
+
+
+
+ &productname; ships with several kernel modules that provide hardware-specific watchdog drivers.
+ For clusters in production environments, we recommend using a hardware watchdog.
+ However, if no watchdog matches your hardware, the software watchdog
+ (softdog) can be used instead.
+
+
+ &productname; uses the SBD daemon as the software component that feeds the watchdog.
+
+
+
+ Using a hardware watchdog
+
+ Finding the right watchdog kernel module for a given system is not
+ trivial. Automatic probing fails often. As a result, many modules
+ are already loaded before the right one gets a chance.
+
+ The following table lists some commonly used watchdog drivers. However, this is
+ not a complete list of supported drivers. If your hardware is not listed here,
+ you can also find a list of choices in the following directories:
+
+
+
+
+ /lib/modules/KERNEL_VERSION/kernel/drivers/watchdog
+
+
+
+
+ /lib/modules/KERNEL_VERSION/kernel/drivers/ipmi
+
+
+
+
+ Alternatively, ask your hardware or
+ system vendor for details on system-specific watchdog configuration.
+
+
+ Commonly used watchdog drivers
+
+
+
+ Hardware
+ Driver
+
+
+
+
+ HP
+ hpwdt
+
+
+ Dell, Lenovo (Intel TCO)
+ iTCO_wdt
+
+
+ Fujitsu
+ ipmi_watchdog
+
+
+ LPAR on IBM Power
+ pseries-wdt
+
+
+ VM on IBM z/VM
+ vmwatchdog
+
+
+ Xen VM (DomU)
+ xen_xdt
+
+
+ VM on VMware vSphere
+ wdat_wdt
+
+
+ Generic
+ softdog
+
+
+
+
+
+ Accessing the watchdog timer
+
+ Some hardware vendors ship systems management software that uses the
+ watchdog for system resets (for example, HP ASR daemon). If the watchdog is
+ used by SBD, disable such software. No other software must access the
+ watchdog timer.
+
+
+
+ Loading the correct kernel module
+
+
+ List the drivers that are installed with your kernel version:
+
+&prompt.root;rpm -ql kernel-VERSION | grep watchdog
+
+
+
+ List any watchdog modules that are currently loaded in the kernel:
+
+&prompt.root;lsmod | egrep "(wd|dog)"
+
+
+
+ If you get a result, unload the wrong module:
+
+&prompt.root;rmmod WRONG_MODULE
+
+
+
+ Enable the watchdog module that matches your hardware:
+
+&prompt.root;echo WATCHDOG_MODULE > /etc/modules-load.d/watchdog.conf
+&prompt.root;systemctl restart systemd-modules-load
+
+
+
+ Test whether the watchdog module is loaded correctly:
+
+&prompt.root;lsmod | grep dog
+
+
+
+ Verify if the watchdog device is available:
+
+&prompt.root;ls -l /dev/watchdog*
+&prompt.root;sbd query-watchdog
+
+ If the watchdog device is not available, check the module name and options.
+ Maybe use another driver.
+
+
+
+
+ Verify if the watchdog device works:
+
+&prompt.root;sbd -w WATCHDOG_DEVICE test-watchdog
+
+
+
+ Reboot your machine to make sure there are no conflicting kernel modules. For example,
+ if you find the message cannot register ... in your log, this would indicate
+ such conflicting modules. To ignore such modules, refer to
+ .
+
+
+
+
+
+
+ Using the software watchdog (softdog)
+
+ For clusters in production environments, we recommend using a hardware-specific watchdog
+ driver. However, if no watchdog matches your hardware,
+ softdog can be used instead.
+
+
+ Softdog limitations
+
+ The softdog driver assumes that at least one CPU is still running. If all CPUs are stuck,
+ the code in the softdog driver that should reboot the system is never executed.
+ In contrast, hardware watchdogs keep working even if all CPUs are stuck.
+
+
+
+ Loading the softdog kernel module
+
+
+ Enable the softdog watchdog:
+
+&prompt.root;echo softdog > /etc/modules-load.d/watchdog.conf
+&prompt.root;systemctl restart systemd-modules-load
+
+
+
+ Check whether the softdog watchdog module is loaded correctly:
+
+&prompt.root;lsmod | grep softdog
+
+
+
+
+
From ab84656431109d06f247a66c1983315327e13714 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 7 Feb 2024 14:35:22 +1000
Subject: [PATCH 12/39] Add crm cluster join procedure
---
xml/ha_add_nodes.xml | 74 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 72 insertions(+), 2 deletions(-)
diff --git a/xml/ha_add_nodes.xml b/xml/ha_add_nodes.xml
index c3fa6035..ffed805a 100644
--- a/xml/ha_add_nodes.xml
+++ b/xml/ha_add_nodes.xml
@@ -25,10 +25,80 @@
-
+
+
Adding nodes with crm cluster join
-
+ You can add more nodes to the cluster with the crm cluster join bootstrap script.
+ The script only needs access to an existing cluster node, and completes the basic setup
+ on the current machine automatically.
+
+
+ For more information, run the crm cluster join --help command.
+
+
+ Adding nodes with crm cluster join
+
+
+ Log in to a node as &rootuser;, or as a user with sudo privileges.
+
+
+
+
+ Start the bootstrap script:
+
+
+
+
+ If you set up the first node as &rootuser;, you can run this command with
+ no additional parameters:
+
+&prompt.root;crm cluster join
+
+
+
+ If you set up the first node as a sudo user, you must
+ specify the user and node with the option:
+
+&prompt.user;sudo crm cluster join -c USER@&node1;
+
+
+
+ If you set up the first node as a sudo user with SSH agent forwarding,
+ use the following command:
+
+&prompt.user;sudo --preserve-env=SSH_AUTH_SOCK crm cluster join --use-ssh-agent -c USER@&node1;
+
+
+
+ If NTP is not configured to start at boot time, a message
+ appears. The script also checks for a hardware watchdog device.
+ You are warned if none is present.
+
+
+
+
+ If you did not already specify &node1;
+ with , you will be prompted for the IP address of the first node.
+
+
+
+
+ If you did not already configure passwordless SSH access between
+ both machines, you will be prompted for the password of the first node.
+
+
+ After logging in to the specified node, the script copies the
+ &corosync; configuration, configures SSH and &csync;,
+ brings the current machine online as a new cluster node, and
+ starts the service needed for &hawk2;.
+
+
+
+
+ Repeat this procedure for each node. You can check the status of the cluster at any time
+ with the crm status command, or by logging in to &hawk2; and navigating to
+ StatusNodes.
From 2ebbeb9a12a57c14a71dc0e5bc1412c4879c09f3 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 8 Feb 2024 17:01:14 +1000
Subject: [PATCH 13/39] Add inital crm cluster init section
Will expand to be more detailed
---
xml/ha_bootstrap_install.xml | 268 +++++++++++++++++++++++++++++++++++
1 file changed, 268 insertions(+)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index cf362e14..85f8b25c 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -25,4 +25,272 @@
+
+
+ Overview of the crm cluster init script
+
+ The crm cluster init command executes a bootstrap script that defines the
+ basic parameters needed for cluster communication, resulting in a running one-node cluster.
+ The script checks and configures the following components:
+
+
+
+ NTP
+
+
+ Checks if NTP is configured to start at boot time. If not, a message appears.
+
+
+
+
+ SSH
+
+ Creates SSH keys for passwordless login between cluster nodes.
+
+
+
+
+ &csync;
+
+
+ Configures &csync; to replicate configuration files across all nodes
+ in a cluster.
+
+
+
+
+ &corosync;
+
+ Configures the cluster communication system.
+
+
+
+ SBD/watchdog
+
+ Checks if a watchdog exists and asks you whether to configure SBD
+ as node fencing mechanism.
+
+
+
+ Virtual floating IP
+
+ Asks you whether to configure a virtual IP address for cluster
+ administration with &hawk2;.
+
+
+
+ Firewall
+
+ Opens the ports in the firewall that are needed for cluster communication.
+
+
+
+ Cluster name
+
+ Defines a name for the cluster, by default
+ hacluster. This
+ is optional and mostly useful for &geo; clusters. Usually, the cluster
+ name reflects the geographical location and makes it easier to distinguish a site
+ inside a &geo; cluster.
+
+
+
+ &qdevice;/&qnet;
+
+
+ Asks you whether to configure &qdevice;/&qnet; to participate in
+ quorum decisions. We recommend using &qdevice; and &qnet; for clusters
+ with an even number of nodes, and especially for two-node clusters.
+
+
+
+
+
+ &pace; default settings
+
+ The options set by the bootstrap script might not be the same as the &pace;
+ default settings. You can check which settings the bootstrap script changed in
+ /var/log/crmsh/crmsh.log. Any options set during the bootstrap
+ process can be modified later with the &yast; cluster module.
+
+
+
+ Cluster configuration for different platforms
+
+ The crm cluster init script detects the system environment (for example,
+ &ms; Azure) and adjusts certain cluster settings based on the profile for that environment.
+ For more information, see the file /etc/crm/profiles.yml.
+
+
+
+
+
+
+ Setting up the first node with crm cluster init
+
+ Set up the first node with the crm cluster init script.
+ This requires only a minimum of time and manual intervention.
+
+
+ Setting up the first node (&node1;) with
+ crm cluster init
+
+
+ Log in to the first cluster node as &rootuser;, or as a user with
+ sudo privileges.
+
+
+ sudo user SSH key access
+
+ The cluster uses passwordless SSH access for communication between the nodes.
+ The crm cluster init script checks for SSH keys and generates
+ them if they do not already exist.
+
+
+ If you intend to set up the first node as a user with sudo privileges,
+ you must ensure the user's SSH keys exist (or will be generated) locally on the node,
+ not on a remote system.
+
+
+
+
+
+ Start the bootstrap script:
+
+ &prompt.root;crm cluster init --name CLUSTERNAME
+ Replace the CLUSTERNAME
+ placeholder with a meaningful name, like the geographical location of your
+ cluster (for example, &cluster1;).
+ This is especially helpful to create a &geo; cluster later on,
+ as it simplifies the identification of a site.
+
+
+ If you need to use multicast instead of unicast (the default) for your cluster
+ communication, use the option (or ).
+
+
+ The script checks for NTP configuration and a hardware watchdog service.
+ If required, it generates the public and private SSH keys used for SSH access and
+ &csync; synchronization and starts the respective services.
+
+
+
+
+ Configure the cluster communication layer (&corosync;):
+
+
+
+
+ Enter a network address to bind to. By default, the script
+ proposes the network address of eth0.
+ Alternatively, enter a different network address, for example the
+ address of bond0.
+
+
+
+
+ Accept the proposed port (5405) or enter a different one.
+
+
+
+
+
+
+ Set up SBD as the node fencing mechanism:
+
+
+ Confirm with y that you want to use SBD.
+
+
+ Enter a persistent path to the partition of your block device that
+ you want to use for SBD.
+ The path must be consistent across all nodes in the cluster.
+ The script creates a small partition on the device to be used for SBD.
+
+
+
+
+ Configure a virtual IP address for cluster administration with &hawk2;:
+
+
+ Confirm with y that you want to configure a
+ virtual IP address.
+
+ Enter an unused IP address that you want to use as administration IP
+ for &hawk2;: &subnetI;.10
+
+ Instead of logging in to an individual cluster node with &hawk2;,
+ you can connect to the virtual IP address.
+
+
+
+
+
+ Choose whether to configure &qdevice; and &qnet;. For the minimal setup
+ described in this document, decline with n for now.
+
+
+
+
+ Finally, the script will start the cluster services to bring the
+ cluster online and enable &hawk2;. The URL to use for &hawk2; is
+ displayed on the screen.
+
+
+
+
+
+ Logging in to the &hawk2; web interface
+
+ You now have a running one-node cluster. To view its status, proceed as follows:
+
+
+ Logging in to the &hawk2; Web interface
+
+ On any machine, start a Web browser and make sure that JavaScript and
+ cookies are enabled.
+
+
+ As URL, enter the virtual IP address that you configured with the bootstrap script:
+ https://&subnetI;.10:7630/
+
+ Certificate warning
+ If a certificate warning appears when you try to access the URL for
+ the first time, a self-signed certificate is in use. Self-signed
+ certificates are not considered trustworthy by default.
+ Ask your cluster operator for the certificate details to verify the
+ certificate.
+ To proceed anyway, you can add an exception in the browser to bypass
+ the warning.
+
+
+
+ On the &hawk2; login screen, enter the
+ Username and Password of the
+ user that was created by the bootstrap script (user hacluster, password
+ linux).
+
+ Secure password
+ Replace the default password with a secure one as soon as possible:
+
+ &prompt.root;passwd hacluster
+
+
+
+
+ Click Log In. The &hawk2; Web interface
+ shows the Status screen by default:
+
+
+
+
+
From d23a9386fb0fe92cd2827eb7c68d0a1bc098fdaa Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 15 Feb 2024 14:27:05 +1000
Subject: [PATCH 14/39] Add autoyast note to pattern installation procedure
---
xml/ha_install.xml | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/xml/ha_install.xml b/xml/ha_install.xml
index 5d489538..9321f158 100644
--- a/xml/ha_install.xml
+++ b/xml/ha_install.xml
@@ -65,9 +65,15 @@
- Install the &ha; pattern on all machines that
- will be part of your cluster.
+ Repeat these steps on all machines that will be part of the cluster.
+
+ Cloning nodes with &ay;
+
+ You do not need to repeat these steps if you intend to use &ay; to install the rest of
+ the cluster nodes. The clones will have the same installed packages as the original node.
+
+
From d1ffb86b324002e366c10b0b116eef41c3fdad2d Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 16 Feb 2024 17:04:38 +1000
Subject: [PATCH 15/39] Started expanding the crm cluster init section
---
xml/ha_bootstrap_install.xml | 217 ++++++++++++++++++++---------------
1 file changed, 124 insertions(+), 93 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 85f8b25c..98499ede 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -37,71 +37,77 @@
NTP
-
- Checks if NTP is configured to start at boot time. If not, a message appears.
-
+
+ Checks if NTP is configured to start at boot time. If not, a message appears.
+ SSH
- Creates SSH keys for passwordless login between cluster nodes.
-
+
+ Detects or generates SSH keys for passwordless login between cluster nodes.
+ &csync;
-
- Configures &csync; to replicate configuration files across all nodes
- in a cluster.
-
+
+ Configures &csync; to replicate configuration files across all nodes in a cluster.
+ &corosync;
- Configures the cluster communication system.
+
+ Configures the cluster communication system.
+ SBD/watchdog
- Checks if a watchdog exists and asks you whether to configure SBD
- as node fencing mechanism.
+
+ Checks if a watchdog exists and asks you whether to configure SBD as the node fencing mechanism.
+ Virtual floating IP
- Asks you whether to configure a virtual IP address for cluster
- administration with &hawk2;.
+
+ Asks you whether to configure a virtual IP address for cluster administration with &hawk2;.
+ Firewall
- Opens the ports in the firewall that are needed for cluster communication.
+
+ Opens the ports in the firewall that are needed for cluster communication.
+ Cluster name
- Defines a name for the cluster, by default
- hacluster. This
- is optional and mostly useful for &geo; clusters. Usually, the cluster
- name reflects the geographical location and makes it easier to distinguish a site
- inside a &geo; cluster.
+
+ Defines a name for the cluster, by default hacluster. This is
+ optional and mostly useful for &geo; clusters. Usually, the cluster name reflects the
+ geographical location and makes it easier to distinguish a site inside a &geo; cluster.
+ &qdevice;/&qnet;
-
- Asks you whether to configure &qdevice;/&qnet; to participate in
- quorum decisions. We recommend using &qdevice; and &qnet; for clusters
- with an even number of nodes, and especially for two-node clusters.
-
+
+ Asks you whether to configure &qdevice;/&qnet; to participate in quorum decisions.
+ We recommend using &qdevice; and &qnet; for clusters with an even number of nodes,
+ and especially for two-node clusters.
+
@@ -128,30 +134,47 @@
Setting up the first node with crm cluster init
- Set up the first node with the crm cluster init script.
- This requires only a minimum of time and manual intervention.
+ Setting up the first node with the crm cluster init script
+ requires only a minimum of time and manual intervention.
+
+
+ This steps in this procedure show the default option followed by alternative or additional
+ options. For a minimal setup with only the default options, see .
- Setting up the first node (&node1;) with
- crm cluster init
+ Setting up the first node with crm cluster init
- Log in to the first cluster node as &rootuser;, or as a user with
- sudo privileges.
-
-
- sudo user SSH key access
-
- The cluster uses passwordless SSH access for communication between the nodes.
- The crm cluster init script checks for SSH keys and generates
- them if they do not already exist.
+ Log in to the first cluster node:
-
- If you intend to set up the first node as a user with sudo privileges,
- you must ensure the user's SSH keys exist (or will be generated) locally on the node,
- not on a remote system.
-
-
+
+
+ Default
+
+
+ Log into the node as the &rootuser; user.
+
+
+
+
+ sudo user (no SSH agent forwarding)
+
+
+ Log into the node as a user with sudo privileges. The user's SSH keys
+ must exist (or be generated) locally on the node, not on a remote system.
+
+
+
+
+ SSH agent forwarding
+
+
+ Log into the node as a user with sudo privileges, using
+ SSH agent forwarding. ++WIP, add more details here.++
+
+
+
+
@@ -169,8 +192,8 @@
communication, use the option (or ).
- The script checks for NTP configuration and a hardware watchdog service.
- If required, it generates the public and private SSH keys used for SSH access and
+ The script checks for NTP configuration and a hardware watchdog service. If required,
+ it generates the public and private SSH keys used for passwordless SSH access and
&csync; synchronization and starts the respective services.
@@ -183,7 +206,7 @@
Enter a network address to bind to. By default, the script
proposes the network address of eth0.
- Alternatively, enter a different network address, for example the
+ Alternatively, enter a different network address, for example, the
address of bond0.
@@ -245,52 +268,60 @@
You now have a running one-node cluster. To view its status, proceed as follows:
- Logging in to the &hawk2; Web interface
-
- On any machine, start a Web browser and make sure that JavaScript and
- cookies are enabled.
-
-
- As URL, enter the virtual IP address that you configured with the bootstrap script:
- https://&subnetI;.10:7630/
-
- Certificate warning
- If a certificate warning appears when you try to access the URL for
- the first time, a self-signed certificate is in use. Self-signed
- certificates are not considered trustworthy by default.
- Ask your cluster operator for the certificate details to verify the
- certificate.
- To proceed anyway, you can add an exception in the browser to bypass
- the warning.
-
-
-
- On the &hawk2; login screen, enter the
- Username and Password of the
- user that was created by the bootstrap script (user hacluster, password
- linux).
-
- Secure password
- Replace the default password with a secure one as soon as possible:
-
- &prompt.root;passwd hacluster
-
-
-
-
- Click Log In. The &hawk2; Web interface
- shows the Status screen by default:
-
-
-
+ Logging in to the &hawk2; Web interface
+
+
+ On any machine, start a Web browser and make sure that JavaScript and cookies are enabled.
+
+
+
+
+ As URL, enter the virtual IP address that you configured with the bootstrap script:
+
+https://VIRTUAL_IP:7630/
+
+ Certificate warning
+
+ If a certificate warning appears when you try to access the URL for the first time,
+ a self-signed certificate is in use. Self-signed certificates are not considered
+ trustworthy by default.
+
+
+ Ask your cluster operator for the certificate details to verify the certificate.
+
+
+ To proceed anyway, you can add an exception in the browser to bypass the warning.
+
+
+
+
+
+ On the &hawk2; login screen, enter the Username and
+ Password of the user that was created by the bootstrap script
+ (user hacluster, password linux).
+
+
+ Secure password
+
+ Replace the default password with a secure one as soon as possible:
+
+&prompt.root;passwd hacluster
+
+
+
+
+ Click Log In. The &hawk2; Web interface shows the
+ Status screen by default:
+
+
+
From bee8575b4f395d4ded47b461aa88f98600c0fc21 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 24 Apr 2024 16:19:35 +1000
Subject: [PATCH 16/39] Add new metadata from PR#371
---
xml/book_full_install.xml | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
index 63acf620..0a18d281 100644
--- a/xml/book_full_install.xml
+++ b/xml/book_full_install.xml
@@ -12,8 +12,9 @@
-
@@ -35,6 +36,12 @@
yes
+
+ Installation
+ Administration
+ Clustering
+
+ Product Documentation
From 6cfa3e756b03addac067de44017ccab21d5ff482 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 29 May 2024 14:20:13 +1000
Subject: [PATCH 17/39] Fix command prompts in Logging In
---
xml/ha_config_cli.xml | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/xml/ha_config_cli.xml b/xml/ha_config_cli.xml
index ffe0550d..86caad77 100644
--- a/xml/ha_config_cli.xml
+++ b/xml/ha_config_cli.xml
@@ -125,13 +125,13 @@
Log in to the first cluster node as a user with sudo privileges,
using the option to enable SSH agent forwarding:
-user@local > ssh -A USER@NODE1
+user@local> ssh -A USER@NODE1
Initialize the cluster with the crm cluster init script:
-user@node1 > sudo --preserve-env=SSH_AUTH_SOCK \
+user@node1> sudo --preserve-env=SSH_AUTH_SOCK \crm cluster init --use-ssh-agent
@@ -159,7 +159,7 @@
Use the -c option to specify the user and node that initialized
the cluster:
-user@node2 > sudo --preserve-env=SSH_AUTH_SOCK \
+user@node2> sudo --preserve-env=SSH_AUTH_SOCK \
crm cluster join --use-ssh-agent -c USER@NODE1
@@ -172,12 +172,12 @@ crm cluster join --use-ssh-agent -c USER@NODE1
Run the following command on the first node:
-user@node1 > sudo --preserve-env=SSH_AUTH_SOCK \
+user@node1> sudo --preserve-env=SSH_AUTH_SOCK \
crm cluster init ssh --use-ssh-agent
Run the following command on all other nodes:
-user@node2 > sudo --preserve-env=SSH_AUTH_SOCK \
+user@node2> sudo --preserve-env=SSH_AUTH_SOCK \
crm cluster join ssh --use-ssh-agent -c USER@NODE1
From c85cb1ad6c675f326b4d59ecfdde20c1444ac777 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 29 May 2024 15:22:27 +1000
Subject: [PATCH 18/39] Change admin guide authentication section title
---
xml/ha_config_cli.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xml/ha_config_cli.xml b/xml/ha_config_cli.xml
index 86caad77..f396b815 100644
--- a/xml/ha_config_cli.xml
+++ b/xml/ha_config_cli.xml
@@ -61,7 +61,7 @@
- Logging in
+ User privileges and authentication
Managing a cluster requires sufficient privileges. The following users can run the
crm command and its subcommands:
From a8dab519614d0eb9da56c6a4505d2b89f4c434e8 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 30 May 2024 15:47:31 +1000
Subject: [PATCH 19/39] Move log in steps to a new section
---
xml/book_full_install.xml | 1 +
xml/ha_bootstrap_install.xml | 33 -----------
xml/ha_install.xml | 4 +-
xml/ha_log_in.xml | 109 +++++++++++++++++++++++++++++++++++
4 files changed, 112 insertions(+), 35 deletions(-)
create mode 100644 xml/ha_log_in.xml
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
index 0a18d281..f7c2fb46 100644
--- a/xml/book_full_install.xml
+++ b/xml/book_full_install.xml
@@ -62,6 +62,7 @@
Installing cluster nodes
+
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 98499ede..770b3faf 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -143,39 +143,6 @@
Setting up the first node with crm cluster init
-
-
- Log in to the first cluster node:
-
-
-
- Default
-
-
- Log into the node as the &rootuser; user.
-
-
-
-
- sudo user (no SSH agent forwarding)
-
-
- Log into the node as a user with sudo privileges. The user's SSH keys
- must exist (or be generated) locally on the node, not on a remote system.
-
-
-
-
- SSH agent forwarding
-
-
- Log into the node as a user with sudo privileges, using
- SSH agent forwarding. ++WIP, add more details here.++
-
-
-
-
-
Start the bootstrap script:
diff --git a/xml/ha_install.xml b/xml/ha_install.xml
index 9321f158..6429c9db 100644
--- a/xml/ha_install.xml
+++ b/xml/ha_install.xml
@@ -56,12 +56,12 @@
Enable the &ha; extension:
-&prompt.user;sudo SUSEConnect -p sle-ha/&product-ga;.&product-sp;/x86_64 -r ADDITIONAL_REGCODE
+&prompt.root;SUSEConnect -p sle-ha/&product-ga;.&product-sp;/x86_64 -r ADDITIONAL_REGCODE
Install the &ha; pattern:
-&prompt.user;sudo zypper install -t pattern ha_sles
+&prompt.root;zypper install -t pattern ha_sles
diff --git a/xml/ha_log_in.xml b/xml/ha_log_in.xml
new file mode 100644
index 00000000..914ef49f
--- /dev/null
+++ b/xml/ha_log_in.xml
@@ -0,0 +1,109 @@
+
+
+
+ %entities;
+]>
+
+
+ Logging in to the cluster nodes
+
+
+
+ &sleha; clusters use passwordless SSH access for communication between the nodes.
+ If you set up the cluster with crm cluster init, the script checks
+ for SSH keys and generates them if they do not exist. If you set up the cluster
+ with the YaST cluster module, you must configure the SSH keys yourself.
+
+
+ By default, the cluster performs operations as the &rootuser; user. However, if you cannot
+ allow passwordless root SSH access, you can set up the cluster as a user with
+ sudo privileges instead.
+
+
+
+
+ yes
+
+
+
+
+ The following users can set up the cluster on the first node, and add more nodes to the cluster:
+
+
+
+ The &rootuser; user
+
+
+ Setting up and running the cluster as &rootuser; is &pace;'s default and does not
+ require any additional configuration. The &rootuser; user's SSH keys must exist
+ (or be generated) locally on the node, not on a remote system.
+
+
+ To log into to the first cluster node as the &rootuser; user, run the following command:
+
+user@local> ssh root@NODE1
+
+
+
+ A user with sudo privileges (without SSH agent forwarding)
+
+
+ You will need to specify this user when you add more nodes to the cluster with
+ crm cluster join. The user's SSH keys must exist (or be generated)
+ locally on the node, not on a remote system.
+
+
+ To log into to the first cluster node as a sudo user, run the
+ following command:
+
+user@local> ssh USER@NODE1
+
+
+
+ A user with sudo privileges (with SSH agent forwarding)
+
+
+ You can use SSH forwarding to pass your local SSH keys to the cluster nodes.
+ This can be useful if you need to avoid storing SSH keys on the nodes, but requires
+ additional configuration on your local machine and on the cluster nodes.
+
+
+ To log in to the first cluster node with SSH agent forwarding enabled,
+ perform the following steps:
+
+
+
+
+ On your local machine, start the SSH agent and add your keys to it. For more information,
+ see
+ Automated public key logins with ssh-agent in
+ &secguide; for &sles;.
+
+
+
+
+ Log in to the first node with the option to enable
+ SSH agent forwarding:
+
+user@local> ssh -A USER@NODE1
+
+
+
+
+
+
+ When you add nodes to the cluster, you must log in to each node as the same user you set up the first node with.
+
+
+
+ For simplicity, the commands in this guide assume you are logged in as the &rootuser; user. If you logged in as a sudo user, adjust the commands accordingly.
+
+
+
From dc14179e816a8879371a1df40fd4448a5bdf1a7b Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Mon, 3 Jun 2024 14:51:08 +1000
Subject: [PATCH 20/39] Move crm cluster join to bootstrap chapter
---
xml/book_full_install.xml | 2 +-
xml/ha_add_nodes.xml | 240 -----------------------------------
xml/ha_autoyast_deploy.xml | 148 +++++++++++++++++++++
xml/ha_bootstrap_install.xml | 72 +++++++++++
xml/ha_log_in.xml | 4 +-
5 files changed, 223 insertions(+), 243 deletions(-)
delete mode 100644 xml/ha_add_nodes.xml
create mode 100644 xml/ha_autoyast_deploy.xml
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
index f7c2fb46..3b0e9278 100644
--- a/xml/book_full_install.xml
+++ b/xml/book_full_install.xml
@@ -67,7 +67,7 @@
-
+
diff --git a/xml/ha_add_nodes.xml b/xml/ha_add_nodes.xml
deleted file mode 100644
index ffed805a..00000000
--- a/xml/ha_add_nodes.xml
+++ /dev/null
@@ -1,240 +0,0 @@
-
-
-
- %entities;
-]>
-
-
- Adding more nodes
-
-
-
-
-
-
-
-
- yes
-
-
-
-
-
- Adding nodes with crm cluster join
-
- You can add more nodes to the cluster with the crm cluster join bootstrap script.
- The script only needs access to an existing cluster node, and completes the basic setup
- on the current machine automatically.
-
-
- For more information, run the crm cluster join --help command.
-
-
- Adding nodes with crm cluster join
-
-
- Log in to a node as &rootuser;, or as a user with sudo privileges.
-
-
-
-
- Start the bootstrap script:
-
-
-
-
- If you set up the first node as &rootuser;, you can run this command with
- no additional parameters:
-
-&prompt.root;crm cluster join
-
-
-
- If you set up the first node as a sudo user, you must
- specify the user and node with the option:
-
-&prompt.user;sudo crm cluster join -c USER@&node1;
-
-
-
- If you set up the first node as a sudo user with SSH agent forwarding,
- use the following command:
-
-&prompt.user;sudo --preserve-env=SSH_AUTH_SOCK crm cluster join --use-ssh-agent -c USER@&node1;
-
-
-
- If NTP is not configured to start at boot time, a message
- appears. The script also checks for a hardware watchdog device.
- You are warned if none is present.
-
-
-
-
- If you did not already specify &node1;
- with , you will be prompted for the IP address of the first node.
-
-
-
-
- If you did not already configure passwordless SSH access between
- both machines, you will be prompted for the password of the first node.
-
-
- After logging in to the specified node, the script copies the
- &corosync; configuration, configures SSH and &csync;,
- brings the current machine online as a new cluster node, and
- starts the service needed for &hawk2;.
-
-
-
-
- Repeat this procedure for each node. You can check the status of the cluster at any time
- with the crm status command, or by logging in to &hawk2; and navigating to
- StatusNodes.
-
-
-
-
- Adding nodes manually
-
-
-
-
-
-
- Adding nodes with &ay;
-
-
- After you have installed and set up a two-node cluster, you can extend the
- cluster by cloning existing nodes with &ay; and adding the clones to the cluster.
-
-
- &ay; uses profiles that contains installation and configuration data.
- A profile tells &ay; what to install and how to configure the installed system to
- get a ready-to-use system in the end. This profile can then be used
- for mass deployment in different ways (for example, to clone existing
- cluster nodes).
-
-
- For detailed instructions on how to use &ay; in various scenarios,
- see the
- &ayguide; for &sls; &productnumber;.
-
-
-
- Identical hardware
-
- assumes you are rolling
- out &productname; &productnumber; to a set of machines with identical hardware
- configurations.
-
-
- If you need to deploy cluster nodes on non-identical hardware, refer to the
- &deploy; for &sls; &productnumber;,
- chapter Automated Installation, section
- Rule-Based Autoinstallation.
-
-
-
-
- Cloning a cluster node with &ay;
-
-
- Make sure the node you want to clone is correctly installed and
- configured. For details, see the &haquick; or
- .
-
-
-
-
- Follow the description outlined in the &sle;
- &productnumber; &deploy; for simple mass
- installation. This includes the following basic steps:
-
-
-
-
- Creating an &ay; profile. Use the &ay; GUI to create and modify
- a profile based on the existing system configuration. In &ay;,
- choose the &ha; module and click the
- Clone button. If needed, adjust the configuration
- in the other modules and save the resulting control file as XML.
-
-
- If you have configured DRBD, you can select and clone this module in
- the &ay; GUI, too.
-
-
-
-
- Determining the source of the &ay; profile and the parameter to
- pass to the installation routines for the other nodes.
-
-
-
-
- Determining the source of the &sls; and &productname;
- installation data.
-
-
-
-
- Determining and setting up the boot scenario for autoinstallation.
-
-
-
-
- Passing the command line to the installation routines, either by
- adding the parameters manually or by creating an
- info file.
-
-
-
-
- Starting and monitoring the autoinstallation process.
-
-
-
-
-
-
-
- After the clone has been successfully installed, execute the following
- steps to make the cloned node join the cluster:
-
-
-
- Bringing the cloned node online
-
-
- Transfer the key configuration files from the already configured nodes
- to the cloned node with &csync; as described in
- .
-
-
-
-
- To bring the node online, start the cluster services on the cloned
- node as described in .
-
-
-
-
-
- The cloned node now joins the cluster because the
- /etc/corosync/corosync.conf file has been applied to
- the cloned node via &csync;. The CIB is automatically synchronized
- among the cluster nodes.
-
-
-
-
diff --git a/xml/ha_autoyast_deploy.xml b/xml/ha_autoyast_deploy.xml
new file mode 100644
index 00000000..4cb8732e
--- /dev/null
+++ b/xml/ha_autoyast_deploy.xml
@@ -0,0 +1,148 @@
+
+
+
+ %entities;
+]>
+
+
+
+ Deploying nodes with &ay;
+
+
+
+ After you have installed and set up a two-node cluster, you can extend the
+ cluster by cloning existing nodes with &ay; and adding the clones to the cluster.
+
+ &ay; uses profiles that contains installation and configuration data.
+ A profile tells &ay; what to install and how to configure the installed system to
+ get a ready-to-use system in the end. This profile can then be used
+ for mass deployment in different ways (for example, to clone existing cluster nodes).
+
+
+ For detailed instructions on how to use &ay; in various scenarios, see the
+
+ &ayguide; for &sls; &productnumber;.
+
+
+
+
+ yes
+
+
+
+
+
+
+ Identical hardware
+
+ assumes you are rolling
+ out &productname; &productnumber; to a set of machines with identical hardware
+ configurations.
+
+
+ If you need to deploy cluster nodes on non-identical hardware, refer to the
+ &deploy; for &sls; &productnumber;,
+ chapter Automated Installation, section
+ Rule-Based Autoinstallation.
+
+
+
+
+ Cloning a cluster node with &ay;
+
+
+ Make sure the node you want to clone is correctly installed and
+ configured. For details, see the &haquick; or
+ .
+
+
+
+
+ Follow the description outlined in the &sle;
+ &productnumber; &deploy; for simple mass
+ installation. This includes the following basic steps:
+
+
+
+
+ Creating an &ay; profile. Use the &ay; GUI to create and modify
+ a profile based on the existing system configuration. In &ay;,
+ choose the &ha; module and click the
+ Clone button. If needed, adjust the configuration
+ in the other modules and save the resulting control file as XML.
+
+
+ If you have configured DRBD, you can select and clone this module in
+ the &ay; GUI, too.
+
+
+
+
+ Determining the source of the &ay; profile and the parameter to
+ pass to the installation routines for the other nodes.
+
+
+
+
+ Determining the source of the &sls; and &productname;
+ installation data.
+
+
+
+
+ Determining and setting up the boot scenario for autoinstallation.
+
+
+
+
+ Passing the command line to the installation routines, either by
+ adding the parameters manually or by creating an
+ info file.
+
+
+
+
+ Starting and monitoring the autoinstallation process.
+
+
+
+
+
+
+
+ After the clone has been successfully installed, execute the following
+ steps to make the cloned node join the cluster:
+
+
+
+ Bringing the cloned node online
+
+
+ Transfer the key configuration files from the already configured nodes
+ to the cloned node with &csync; as described in
+ .
+
+
+
+
+ To bring the node online, start the cluster services on the cloned
+ node as described in .
+
+
+
+
+
+ The cloned node now joins the cluster because the
+ /etc/corosync/corosync.conf file has been applied to
+ the cloned node via &csync;. The CIB is automatically synchronized
+ among the cluster nodes.
+
+
+
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 770b3faf..9247ca5b 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -291,4 +291,76 @@
+
+
+
+ Adding nodes with crm cluster join
+
+ You can add more nodes to the cluster with the crm cluster join bootstrap script.
+ The script only needs access to an existing cluster node, and completes the basic setup
+ on the current machine automatically.
+
+
+ For more information, run the crm cluster join --help command.
+
+
+ Adding nodes with crm cluster join
+
+
+ Start the bootstrap script:
+
+
+
+
+ If you set up the first node as &rootuser;, you can run this command with
+ no additional parameters:
+
+&prompt.root;crm cluster join
+
+
+
+ If you set up the first node as a sudo user, you must
+ specify the user and node with the option:
+
+&prompt.user;sudo crm cluster join -c USER@&node1;
+
+
+
+ If you set up the first node as a sudo user with SSH agent forwarding,
+ use the following command:
+
+&prompt.user;sudo --preserve-env=SSH_AUTH_SOCK crm cluster join --use-ssh-agent -c USER@&node1;
+
+
+
+ If NTP is not configured to start at boot time, a message
+ appears. The script also checks for a hardware watchdog device.
+ You are warned if none is present.
+
+
+
+
+ If you did not already specify the first cluster node
+ with , you will be prompted for its IP address.
+
+
+
+
+ If you did not already configure passwordless SSH access between the cluster nodes,
+ you will be prompted for the password of the first node.
+
+
+ After logging in to the specified node, the script copies the
+ &corosync; configuration, configures SSH and &csync;,
+ brings the current machine online as a new cluster node, and
+ starts the service needed for &hawk2;.
+
+
+
+
+ Repeat this procedure for each node. You can check the status of the cluster at any time
+ with the crm status command, or by logging in to &hawk2; and navigating to
+ StatusNodes.
+
+
diff --git a/xml/ha_log_in.xml b/xml/ha_log_in.xml
index 914ef49f..29e89fc9 100644
--- a/xml/ha_log_in.xml
+++ b/xml/ha_log_in.xml
@@ -46,7 +46,7 @@
(or be generated) locally on the node, not on a remote system.
- To log into to the first cluster node as the &rootuser; user, run the following command:
+ To log in to the first cluster node as the &rootuser; user, run the following command:
user@local> ssh root@NODE1
@@ -60,7 +60,7 @@
locally on the node, not on a remote system.
- To log into to the first cluster node as a sudo user, run the
+ To log in to the first cluster node as a sudo user, run the
following command:
user@local> ssh USER@NODE1
From 8af6a494afe77a13ed6ac16f10e352d72b20cb65 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Tue, 4 Jun 2024 15:14:22 +1000
Subject: [PATCH 21/39] Split csync section to flow better in yast chapter
---
xml/ha_yast_cluster.xml | 267 +++++++++++++++++++---------------------
1 file changed, 127 insertions(+), 140 deletions(-)
diff --git a/xml/ha_yast_cluster.xml b/xml/ha_yast_cluster.xml
index 2e2b3688..765be4f7 100644
--- a/xml/ha_yast_cluster.xml
+++ b/xml/ha_yast_cluster.xml
@@ -641,6 +641,115 @@
+
+ Configuring &csync; to synchronize files
+
+ Instead of copying the configuration files to all nodes
+ manually, use the csync2 tool for replication across
+ all nodes in the cluster. &csync; helps you to keep track of configuration changes
+ and to keep files synchronized across the cluster nodes:
+
+
+
+
+ You can define a list of files that are important for operation.
+
+
+
+
+ You can show changes to these files (against the other cluster nodes).
+
+
+
+
+ You can synchronize the configured files with a single command.
+
+
+
+
+ With a simple shell script in ~/.bash_logout, you
+ can be reminded about unsynchronized changes before logging out of the
+ system.
+
+
+
+
+ Find detailed information about &csync; at
+ and
+ .
+
+
+ Pushing synchronization after any changes
+
+ &csync; only pushes changes. It does not continuously
+ synchronize files between the machines. Each time you update files that need
+ to be synchronized, you need to push the changes to the other machines.
+ Using csync2 to push changes is described later, after
+ the cluster configuration with &yast; is complete.
+
+
+
+ Configuring &csync; with &yast;
+
+ Start the &yast; cluster module and switch to the
+ &csync; category.
+
+
+ To specify the synchronization group, click Add
+ in the Sync Host group and enter the local host names
+ of all nodes in your cluster. For each node, you must use exactly the
+ strings that are returned by the hostname command.
+
+ Host name resolution
+ If host name resolution does not work properly in your
+ network, you can also specify a combination of host name and IP address
+ for each cluster node. To do so, use the string
+ HOSTNAME@IP such as
+ &node1;@&wsIip;, for example. &csync;
+ then uses the IP addresses when connecting.
+
+
+
+ Click Generate Pre-Shared-Keys to create a key
+ file for the synchronization group. The key file is written to
+ /etc/csync2/key_hagroup. After it has been created,
+ it must be copied manually to all members of the cluster.
+
+
+ To populate the Sync File list with the files
+ that usually need to be synchronized among all nodes, click Add
+ Suggested Files.
+
+
+ To Edit, Add or
+ Remove files from the list of files to be synchronized
+ use the respective buttons. You must enter the absolute path for each
+ file.
+
+
+ Activate &csync; by clicking Turn &csync;
+ ON. This executes the following command to start
+ &csync; automatically at boot time:
+ &prompt.root;systemctl enable csync2.socket
+
+
+ Click Finish. &yast; writes the &csync;
+ configuration to /etc/csync2/csync2.cfg.
+
+
+
+
+
Synchronizing connection status between cluster nodes
@@ -782,138 +891,22 @@
-
+ Transferring the configuration to all nodes
- Instead of copying the resulting configuration files to all nodes
- manually, use the csync2 tool for replication across
- all nodes in the cluster.
-
-
- This requires the following basic steps:
-
-
-
-
- .
-
-
-
-
- .
-
-
-
-
- &csync; helps you to keep track of configuration changes and to keep
- files synchronized across the cluster nodes:
+ After the cluster configuration with &yast; is complete, use csync2
+ to copy the configuration files to the rest of the cluster nodes. To receive the files,
+ nodes must be included in the Sync Host group you configured in
+ .
-
-
-
- You can define a list of files that are important for operation.
-
-
-
-
- You can show changes to these files (against the other cluster nodes).
-
-
-
-
- You can synchronize the configured files with a single command.
-
-
-
-
- With a simple shell script in ~/.bash_logout, you
- can be reminded about unsynchronized changes before logging out of the
- system.
-
-
-
-
- Find detailed information about &csync; at
- and
- .
-
-
-
- Configuring &csync; with &yast;
-
- Configuring &csync; with &yast;
-
- Start the &yast; cluster module and switch to the
- &csync; category.
-
-
- To specify the synchronization group, click Add
- in the Sync Host group and enter the local host names
- of all nodes in your cluster. For each node, you must use exactly the
- strings that are returned by the hostname command.
-
-
- Host name resolution
- If host name resolution does not work properly in your
- network, you can also specify a combination of host name and IP address
- for each cluster node. To do so, use the string
- HOSTNAME@IP such as
- &node1;@&wsIip;, for example. &csync;
- then uses the IP addresses when connecting.
-
-
-
- Click Generate Pre-Shared-Keys to create a key
- file for the synchronization group. The key file is written to
- /etc/csync2/key_hagroup. After it has been created,
- it must be copied manually to all members of the cluster.
-
-
- To populate the Sync File list with the files
- that usually need to be synchronized among all nodes, click Add
- Suggested Files.
-
-
- To Edit, Add or
- Remove files from the list of files to be synchronized
- use the respective buttons. You must enter the absolute path for each
- file.
-
-
- Activate &csync; by clicking Turn &csync;
- ON. This executes the following command to start
- &csync; automatically at boot time:
- &prompt.root;systemctl enable csync2.socket
-
-
- Click Finish. &yast; writes the &csync;
- configuration to /etc/csync2/csync2.cfg.
-
-
-
-
-
-
- Synchronizing changes with &csync; Before running &csync; for the first time, you need to make the
following preparations:
-
Preparing for initial synchronization with &csync;
- Copy the file /etc/csync2/csync2.cfg
- manually to all nodes after you have configured it as described in .
+
+ Copy the file /etc/csync2/csync2.cfg manually to all nodes.
+ Copy the file /etc/csync2/key_hagroup that you
@@ -925,24 +918,19 @@
regenerate the file on the other nodes—it needs to be the same
file on all nodes.
-
-
- Execute the following command on all nodes to start the service now:
+ Run the following command on all nodes to start the service now: &prompt.root;systemctl start csync2.socket
+
+ Use the following procedure to transfer the configuration files to all cluster nodes:
+
- Synchronizing the configuration files with &csync;
-
+ Synchronizing changes with &csync;
- To initially synchronize all files once, execute the following
+ To synchronize all files once, run the following
command on the machine that you want to copy the configuration
from: &prompt.root;csync2 -xv
@@ -966,17 +954,16 @@ Finished with 1 errors.
For more information on the &csync; options, run&prompt.root;csync2 -help
-
+ Pushing synchronization after any changes &csync; only pushes changes. It does not
continuously synchronize files between the machines. Each time you update files that need to be synchronized, you need to
- push the changes to the other machines by running csync2
- on the machine where you did the changes. If you run
+ push the changes to the other machines by running csync2 -xv
+ on the machine where you did the changes. If you run
the command on any of the other machines with unchanged files, nothing
happens.
-
-
+
From c4b3d15f03ce948fda1dc146d42072e4f885a74e Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 12 Jul 2024 13:55:54 +1000
Subject: [PATCH 22/39] Update to latest metadata
---
xml/book_full_install.xml | 100 ---------------------------
xml/html/rh-book-administration.html | 85 +++++++++++++++++++++++
2 files changed, 85 insertions(+), 100 deletions(-)
delete mode 100644 xml/book_full_install.xml
create mode 100644 xml/html/rh-book-administration.html
diff --git a/xml/book_full_install.xml b/xml/book_full_install.xml
deleted file mode 100644
index 3b0e9278..00000000
--- a/xml/book_full_install.xml
+++ /dev/null
@@ -1,100 +0,0 @@
-
-
-
- %entities;
-]>
-
-
-
-
-
-
-
- Installing High Availability clusters for critical workloads
- &productname;
- &productnameshort;
- &productnumber;
-
-
-
-
-
-
- TBD
-
-
-
-
- yes
-
-
- Installation
- Administration
- Clustering
-
- Product Documentation
-
-
-
-
-
-
-
- Planning for deployment
-
-
-
-
-
-
-
-
-
-
- Installing cluster nodes
-
-
-
-
-
-
-
-
-
-
-
-
-
- Additional configuration
-
-
-
-
-
-
-
-
- Testing the setup
-
-
-
-
-
-
-
-
-
-
diff --git a/xml/html/rh-book-administration.html b/xml/html/rh-book-administration.html
new file mode 100644
index 00000000..47bbfc19
--- /dev/null
+++ b/xml/html/rh-book-administration.html
@@ -0,0 +1,85 @@
+
+Revision History: Administration Guide
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Revision History: Administration Guide
2024-06-26
+
+ Updated for the initial release of SUSE Linux Enterprise High Availability 15 SP6.
+
+
From 25ecc0971e093b45f67950973db17c44babc3982 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 12 Jul 2024 14:00:37 +1000
Subject: [PATCH 23/39] Change title
---
DC-SLE-HA-full-install => DC-SLE-HA-deployment | 2 +-
xml/MAIN.SLEHA.xml | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
rename DC-SLE-HA-full-install => DC-SLE-HA-deployment (96%)
diff --git a/DC-SLE-HA-full-install b/DC-SLE-HA-deployment
similarity index 96%
rename from DC-SLE-HA-full-install
rename to DC-SLE-HA-deployment
index a8aa8ad6..0f0b4c94 100644
--- a/DC-SLE-HA-full-install
+++ b/DC-SLE-HA-deployment
@@ -5,7 +5,7 @@
##
## Basics
MAIN="MAIN.SLEHA.xml"
-ROOTID=book-full-install
+ROOTID=book-deployment
## Profiling
PROFOS="sles"
diff --git a/xml/MAIN.SLEHA.xml b/xml/MAIN.SLEHA.xml
index b9b7b334..93b07e89 100644
--- a/xml/MAIN.SLEHA.xml
+++ b/xml/MAIN.SLEHA.xml
@@ -42,8 +42,8 @@
-
-
+
+
From cd20db32c54aefa279269fb45e9302eb9b5ff6c1 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:57:03 +1000
Subject: [PATCH 24/39] Typo
---
xml/ha_bootstrap_install.xml | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 9247ca5b..414391e8 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -138,8 +138,9 @@
requires only a minimum of time and manual intervention.
- This steps in this procedure show the default option followed by alternative or additional
- options. For a minimal setup with only the default options, see .
+ The steps in this procedure show the default option followed by alternative or additional
+ options. For a minimal setup with only the default options,
+ see .
Setting up the first node with crm cluster init
From a795331bfde98e08d7bd44f21d593b5def729b61 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 28 Aug 2024 15:07:04 +1000
Subject: [PATCH 25/39] alice -> node1
---
xml/ha_bootstrap_install.xml | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 414391e8..4af11651 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -323,14 +323,15 @@
If you set up the first node as a sudo user, you must
specify the user and node with the option:
-&prompt.user;sudo crm cluster join -c USER@&node1;
+&prompt.user;sudo crm cluster join -c USER@NODE1
If you set up the first node as a sudo user with SSH agent forwarding,
use the following command:
-&prompt.user;sudo --preserve-env=SSH_AUTH_SOCK crm cluster join --use-ssh-agent -c USER@&node1;
+&prompt.user;sudo --preserve-env=SSH_AUTH_SOCK \
+crm cluster join --use-ssh-agent -c USER@NODE1
From b232248e4f3178e2f49a15f42e0bcb6a63419068 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 29 Aug 2024 10:20:30 +1000
Subject: [PATCH 26/39] Clarify node login
---
xml/ha_bootstrap_install.xml | 13 ++++++++++++-
xml/ha_log_in.xml | 23 +++++++++++------------
2 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 4af11651..10e6db8c 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -144,11 +144,17 @@
Setting up the first node with crm cluster init
+
+
+ Log in to the first cluster node as &rootuser;, or as a user with sudo
+ privileges.
+
+
Start the bootstrap script:
- &prompt.root;crm cluster init --name CLUSTERNAME
+ &prompt.root;crm cluster initReplace the CLUSTERNAME
placeholder with a meaningful name, like the geographical location of your
cluster (for example, &cluster1;).
@@ -306,6 +312,11 @@
Adding nodes with crm cluster join
+
+
+ Log in to this node as the same user you set up the first node with.
+
+
Start the bootstrap script:
diff --git a/xml/ha_log_in.xml b/xml/ha_log_in.xml
index 29e89fc9..fdc04895 100644
--- a/xml/ha_log_in.xml
+++ b/xml/ha_log_in.xml
@@ -46,9 +46,9 @@
(or be generated) locally on the node, not on a remote system.
- To log in to the first cluster node as the &rootuser; user, run the following command:
+ To log in to a node as the &rootuser; user, run the following command:
-user@local> ssh root@NODE1
+user@local> ssh root@NODE
@@ -60,10 +60,9 @@
locally on the node, not on a remote system.
- To log in to the first cluster node as a sudo user, run the
- following command:
+ To log in to a node as a sudo user, run the following command:
-user@local> ssh USER@NODE1
+user@local> ssh USER@NODE
@@ -75,8 +74,7 @@
additional configuration on your local machine and on the cluster nodes.
- To log in to the first cluster node with SSH agent forwarding enabled,
- perform the following steps:
+ To log in to a node with SSH agent forwarding enabled, perform the following steps:
@@ -89,21 +87,22 @@
- Log in to the first node with the option to enable
- SSH agent forwarding:
+ Log in to the node with the option to enable SSH agent forwarding:
-user@local> ssh -A USER@NODE1
+user@local> ssh -A USER@NODE
- When you add nodes to the cluster, you must log in to each node as the same user you set up the first node with.
+ When you add nodes to the cluster, you must log in to each node as the same user you set
+ up the first node with.
- For simplicity, the commands in this guide assume you are logged in as the &rootuser; user. If you logged in as a sudo user, adjust the commands accordingly.
+ For simplicity, the commands in this guide assume you are logged in as the &rootuser; user.
+ If you logged in as a sudo user, adjust the commands accordingly.
From 6a703bcd061f6a4c7deddf7bfe625ea75672946c Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 30 Aug 2024 17:24:15 +1000
Subject: [PATCH 27/39] Add more info about starting the init script
---
xml/ha_bootstrap_install.xml | 91 ++++++++++++++++++++++++++++++------
1 file changed, 76 insertions(+), 15 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 10e6db8c..cc356dff 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -154,22 +154,74 @@
Start the bootstrap script:
- &prompt.root;crm cluster init
- Replace the CLUSTERNAME
- placeholder with a meaningful name, like the geographical location of your
- cluster (for example, &cluster1;).
- This is especially helpful to create a &geo; cluster later on,
- as it simplifies the identification of a site.
-
- If you need to use multicast instead of unicast (the default) for your cluster
- communication, use the option (or ).
+ You can start the script without specifying any options. This prompts you for input for
+ some settings, as described in the next steps, and uses &crmsh;'s default values for
+ other settings.
+
+
+
+ If you logged in as &rootuser;, you can run this command with no additional parameters:
+
+&prompt.root;crm cluster init
+
+
+
+ If you logged in as a sudo user without SSH agent forwarding,
+ run this command with sudo:
+
+&prompt.user;sudo crm cluster init
+
+
+
+ If you logged in as a sudo user with SSH agent forwarding enabled,
+ you must preserve the environment variable SSH_AUTH_SOCK
+ and tell the script to use your local SSH keys instead of generating keys on the node:
+
+&prompt.user;sudo --preserve-env=SSH_AUTH_SOCK crm cluster init --use-ssh-agent
+
+
- The script checks for NTP configuration and a hardware watchdog service. If required,
- it generates the public and private SSH keys used for passwordless SSH access and
- &csync; synchronization and starts the respective services.
+ Alternatively, you can specify additional options as part of the initialization command.
+ You can include multiple options in the same command. Some examples are shown below.
+ For more options, run crm cluster help init.
+
+
+ Multicast
+
+
+ Unicast is the default transport type for cluster communication. To use multicast
+ instead, use the option (or ).
+ For example:
+
+&prompt.root;crm cluster init --multicast
+
+
+
+ SBD disks
+
+
+ In a later step, the script asks if you want to set up SBD and prompts you for a disk
+ to use. To configure the cluster with multiple SBD disks, use the option
+ (or ) multiple times. For example:
+
+&prompt.root;crm cluster init --sbd-device /dev/disk/by-id/ID1 --sbd-device /dev/disk/by-id/ID2
+
+
+
+ Network interfaces
+
+
+ In a later step, the script prompts you for a network interface for &corosync; to use.
+ To configure the cluster with two network interfaces, use the option
+ (or ) twice. For example:
+
+&prompt.root;crm cluster init --interface eth0 --interface eth1
+
+
+
@@ -206,6 +258,13 @@
+
+
+ Enter a name for the cluster. Choose a meaningful name, like the geographical location
+ of the cluster (for example, &cluster1;). This is especially helpful
+ if you create a &geo; cluster later, as it simplifies the identification of a site.
+
+ Configure a virtual IP address for cluster administration with &hawk2;:
@@ -229,9 +288,11 @@
- Finally, the script will start the cluster services to bring the
- cluster online and enable &hawk2;. The URL to use for &hawk2; is
- displayed on the screen.
+ The script checks for NTP configuration and a hardware watchdog service. If required,
+ it generates the public and private SSH keys used for passwordless SSH access and
+ &csync; synchronization and starts the respective services. Finally, the script
+ starts the cluster services to bring the cluster online and enables &hawk2;.
+ The URL to use for &hawk2; is displayed on the screen.
From 6c885c4e9b9e0e79eda0d02168da73867c11723a Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 6 Sep 2024 17:16:36 +1000
Subject: [PATCH 28/39] Further expand crm cluster init procedure
---
xml/ha_bootstrap_install.xml | 136 ++++++++++++++++++++++++++---------
1 file changed, 104 insertions(+), 32 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index cc356dff..6a655b96 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -188,6 +188,21 @@
For more options, run crm cluster help init.
+
+ Cluster name
+
+
+ The default cluster name is hacluster. To choose a different name,
+ use the option (or ). For example:
+
+&prompt.root;crm cluster init --name &cluster1;
+
+ Choose a meaningful name, like the geographical location of the cluster. This is
+ especially helpful if you create a &geo; cluster later, as it simplifies the
+ identification of a site.
+
+
+ Multicast
@@ -214,11 +229,26 @@
Network interfaces
- In a later step, the script prompts you for a network interface for &corosync; to use.
+ In a later step, the script prompts you for a network address for &corosync; to use.
To configure the cluster with two network interfaces, use the option
(or ) twice. For example:
&prompt.root;crm cluster init --interface eth0 --interface eth1
+ TODO: This and -M seem to do the same thing. What's the difference?
+
+
+
+ Redundant communication channel
+
+
+ Supported clusters must have two communication channels. The preferred method is to
+ use network device bonding. If you cannot use bonding, the alternative is to set up
+ a redundant communication channel in &corosync;. By default, the script prompts you
+ for a network address for a single communication channel. To configure the cluster
+ with two communication channels, use the option
+ (or ). For example:
+
+&prompt.root;crm cluster init --multi-heartbeats
@@ -241,50 +271,92 @@
Accept the proposed port (5405) or enter a different one.
-
-
-
-
- Set up SBD as the node fencing mechanism:
-
-
- Confirm with y that you want to use SBD.
-
- Enter a persistent path to the partition of your block device that
- you want to use for SBD.
- The path must be consistent across all nodes in the cluster.
- The script creates a small partition on the device to be used for SBD.
+
+ TODO: Better words. If you used -M or -i twice, enter a second network address and port.
+
- Enter a name for the cluster. Choose a meaningful name, like the geographical location
- of the cluster (for example, &cluster1;). This is especially helpful
- if you create a &geo; cluster later, as it simplifies the identification of a site.
+ Choose whether to set up SBD as the node fencing mechanism. If you are using a different
+ fencing mechanism or want to set up SBD later, enter n to skip this step.
+ To continue with this step, enter y.
+
+
+ Select the type of SBD to use:
+
+
+
+ To use diskless SBD, enter none.
+
+
+
+
+ To use disk-based SBD, enter a persistent path to the partition of the block device you
+ want to use. The path must be consistent across all nodes in the cluster, for example,
+ /dev/disk/by-id/ID.
+
+
+ The script creates a small partition on the device to be used for SBD.
+
+
+
- Configure a virtual IP address for cluster administration with &hawk2;:
-
-
- Confirm with y that you want to configure a
- virtual IP address.
-
- Enter an unused IP address that you want to use as administration IP
- for &hawk2;: &subnetI;.10
-
- Instead of logging in to an individual cluster node with &hawk2;,
- you can connect to the virtual IP address.
-
-
+
+ Choose whether to configure a virtual IP address for cluster administration with &hawk2;.
+ Instead of logging in to an individual cluster node with &hawk2;, you can connect
+ to the virtual IP address.
+
+
+ If you choose y, enter an unused IP address to use for &hawk2;.
+
- Choose whether to configure &qdevice; and &qnet;. For the minimal setup
- described in this document, decline with n for now.
+ Choose whether to configure &qdevice; and &qnet;. If you have not set up the &qnet; server
+ yet, enter n to skip this step and set up &qdevice; and &qnet; later.
+ If you choose y, provide the following information:
+
+
+
+ Enter the host name or IP address of the &qnet; server.
+
+
+ For the remaining fields, you can accept the default values or change them as required:
+
+
+
+
+ Accept the proposed port (5403) or enter a different one.
+
+
+
+
+ Choose the algorithm that determines how votes are assigned.
+
+
+
+
+ Choose the method to use when a tie-breaker is required.
+
+
+
+
+ Choose whether to enable TLS. TODO: More info on the options, see that other bug.
+
+
+
+
+ Enter heuristics commands to affect how votes are determined. To skip this step, leave
+ the field blank.
+
+
+
From 5dc8088440308702abaee86ce81ace820b670c11 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Tue, 10 Sep 2024 15:10:01 +1000
Subject: [PATCH 29/39] Fill out crm cluster init more
---
xml/ha_bootstrap_install.xml | 56 ++++++++++++++++++------------------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 6a655b96..9d1a7bd6 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -195,7 +195,7 @@
The default cluster name is hacluster. To choose a different name,
use the option (or ). For example:
-&prompt.root;crm cluster init --name &cluster1;
+&prompt.root;crm cluster init --name CLUSTERNAME
Choose a meaningful name, like the geographical location of the cluster. This is
especially helpful if you create a &geo; cluster later, as it simplifies the
@@ -225,30 +225,25 @@
&prompt.root;crm cluster init --sbd-device /dev/disk/by-id/ID1 --sbd-device /dev/disk/by-id/ID2
-
- Network interfaces
-
-
- In a later step, the script prompts you for a network address for &corosync; to use.
- To configure the cluster with two network interfaces, use the option
- (or ) twice. For example:
-
-&prompt.root;crm cluster init --interface eth0 --interface eth1
- TODO: This and -M seem to do the same thing. What's the difference?
-
- Redundant communication channel
Supported clusters must have two communication channels. The preferred method is to
- use network device bonding. If you cannot use bonding, the alternative is to set up
- a redundant communication channel in &corosync;. By default, the script prompts you
- for a network address for a single communication channel. To configure the cluster
- with two communication channels, use the option
- (or ). For example:
+ use network device bonding. If you cannot use bonding, you can set up a redundant
+ communication channel in &corosync; (also known as a second ring or heartbeat line).
+ By default, the script prompts you for a network address for a single ring.
+ To configure the cluster with two rings, use the option
+ (or ) twice. For example:
-&prompt.root;crm cluster init --multi-heartbeats
+&prompt.root;crm cluster init --interface eth0 --interface eth1
+
+
+ You can also use (or ) to set
+ up a second &corosync; ring . This option uses the first two network interfaces by
+ default, whereas allows you to specify any two network interfaces.
+
+
@@ -273,7 +268,9 @@
- TODO: Better words. If you used -M or -i twice, enter a second network address and port.
+ If you started the script with an option that configures a redundant communication channel,
+ enter y to accept a second heartbeat line, then either accept the
+ proposed network address and port or enter different ones.
@@ -282,10 +279,9 @@
Choose whether to set up SBD as the node fencing mechanism. If you are using a different
fencing mechanism or want to set up SBD later, enter n to skip this step.
- To continue with this step, enter y.
- Select the type of SBD to use:
+ If you chose y, select the type of SBD to use:
@@ -312,19 +308,23 @@
to the virtual IP address.
- If you choose y, enter an unused IP address to use for &hawk2;.
+ If you chose y, enter an unused IP address to use for &hawk2;.
- Choose whether to configure &qdevice; and &qnet;. If you have not set up the &qnet; server
- yet, enter n to skip this step and set up &qdevice; and &qnet; later.
- If you choose y, provide the following information:
+ Choose whether to configure &qdevice; and &qnet;. If you do not need to use &qdevice; or
+ have not set up the &qnet; server yet, enter n to skip this step.
+ You can set up &qdevice; and &qnet; later if required.
+
+
+ If you chose y, provide the following information:
- Enter the host name or IP address of the &qnet; server.
+ Enter the host name or IP address of the &qnet; server. The cluster node must have
+ SSH access to this server to complete the configuration.
For the remaining fields, you can accept the default values or change them as required:
@@ -347,7 +347,7 @@
- Choose whether to enable TLS. TODO: More info on the options, see that other bug.
+ Choose whether to enable TLS.
From 1a31940355faccd486682bc18763086aa48090fb Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Tue, 10 Sep 2024 15:34:24 +1000
Subject: [PATCH 30/39] Move hacluster password warning earlier
---
xml/ha_bootstrap_install.xml | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index 9d1a7bd6..c1a5cc8b 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -366,6 +366,16 @@
starts the cluster services to bring the cluster online and enables &hawk2;.
The URL to use for &hawk2; is displayed on the screen.
+
+ Secure password for hacluster
+
+ The crm cluster init script creates a default user
+ (hacluster) and password
+ (linux). Replace the default password with a secure one
+ as soon as possible:
+
+&prompt.root;passwd hacluster
+
@@ -403,16 +413,15 @@
- On the &hawk2; login screen, enter the Username and
- Password of the user that was created by the bootstrap script
- (user hacluster, password linux).
+ On the &hawk2; login screen, enter the Username of the user that was
+ created by the bootstrap script (hacluster)
+ and the secure Password that you changed from the bootstrap script's
+ default password.
-
- Secure password
+
- Replace the default password with a secure one as soon as possible:
+ If you have not already changed the default password to a secure one, do so now.
-&prompt.root;passwd hacluster
From c5bc6b8244758a2a1bc50e7e8134f3070f7d0a6e Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 13 Sep 2024 16:54:58 +1000
Subject: [PATCH 31/39] Move back to Admin Guide
For SLE 15, it just isn't viable to have a new guide.
A lot of the content is already in the Admin Guide
and would need to be either duplicated or linked to.
Reusing content is currently not simple.
For SLE 16, we can revisit having a separate guide.
---
DC-SLE-HA-deployment | 25 ----
xml/MAIN.SLEHA.xml | 3 -
xml/book_administration.xml | 16 +++
xml/ha_bootstrap_install.xml | 4 +-
xml/ha_install_intro.xml | 30 -----
xml/ha_sbd_watchdog.xml | 216 -----------------------------------
6 files changed, 19 insertions(+), 275 deletions(-)
delete mode 100644 DC-SLE-HA-deployment
delete mode 100644 xml/ha_install_intro.xml
delete mode 100644 xml/ha_sbd_watchdog.xml
diff --git a/DC-SLE-HA-deployment b/DC-SLE-HA-deployment
deleted file mode 100644
index 0f0b4c94..00000000
--- a/DC-SLE-HA-deployment
+++ /dev/null
@@ -1,25 +0,0 @@
-## ----------------------------
-## Doc Config File for SUSE Linux Enterprise High Availability Extension
-## Full installation guide
-## ----------------------------
-##
-## Basics
-MAIN="MAIN.SLEHA.xml"
-ROOTID=book-deployment
-
-## Profiling
-PROFOS="sles"
-PROFCONDITION="suse-product"
-
-## stylesheet location
-STYLEROOT="/usr/share/xml/docbook/stylesheet/suse2022-ns"
-FALLBACK_STYLEROOT="/usr/share/xml/docbook/stylesheet/suse-ns"
-
-## enable sourcing
-export DOCCONF=$BASH_SOURCE
-
-##do not show remarks directly in the (PDF) text
-#XSLTPARAM="--param use.xep.annotate.pdf=0"
-
-### Sort the glossary
-XSLTPARAM="--param glossary.sort=1"
diff --git a/xml/MAIN.SLEHA.xml b/xml/MAIN.SLEHA.xml
index 93b07e89..b452e831 100644
--- a/xml/MAIN.SLEHA.xml
+++ b/xml/MAIN.SLEHA.xml
@@ -42,9 +42,6 @@
-
-
-
diff --git a/xml/book_administration.xml b/xml/book_administration.xml
index 819f3fd7..6eb0abd6 100644
--- a/xml/book_administration.xml
+++ b/xml/book_administration.xml
@@ -55,6 +55,22 @@
+
+
+
+
+ Installation and setup
+
+
+
+
+
+
+
+
+
+
+
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index c1a5cc8b..f409e7da 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -16,7 +16,9 @@
-
+ &productname; includes bootstrap scripts to simplify the installation of a cluster.
+ You can use these scripts to set up the cluster on the first node, add more nodes to the
+ cluster, remove nodes from the cluster, and adjust certain settings in an existing cluster.
diff --git a/xml/ha_install_intro.xml b/xml/ha_install_intro.xml
deleted file mode 100644
index dd655a3d..00000000
--- a/xml/ha_install_intro.xml
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
- %entities;
-]>
-
-
- Preface
-
-
-
- editing
-
-
- yes
-
-
-
-
-
-
-
-
-
-
-
diff --git a/xml/ha_sbd_watchdog.xml b/xml/ha_sbd_watchdog.xml
deleted file mode 100644
index df3848d7..00000000
--- a/xml/ha_sbd_watchdog.xml
+++ /dev/null
@@ -1,216 +0,0 @@
-
-
-
- %entities;
-]>
-
-
- Setting up a watchdog for SBD
-
-
-
- If you are using SBD as your &stonith; device, you must enable a watchdog on each
- cluster node. If you are using a different &stonith; device, you can skip this chapter.
-
-
-
-
- yes
-
-
-
-
-
-
- &productname; ships with several kernel modules that provide hardware-specific watchdog drivers.
- For clusters in production environments, we recommend using a hardware watchdog.
- However, if no watchdog matches your hardware, the software watchdog
- (softdog) can be used instead.
-
-
- &productname; uses the SBD daemon as the software component that feeds the watchdog.
-
-
-
- Using a hardware watchdog
-
- Finding the right watchdog kernel module for a given system is not
- trivial. Automatic probing fails often. As a result, many modules
- are already loaded before the right one gets a chance.
-
- The following table lists some commonly used watchdog drivers. However, this is
- not a complete list of supported drivers. If your hardware is not listed here,
- you can also find a list of choices in the following directories:
-
-
-
-
- /lib/modules/KERNEL_VERSION/kernel/drivers/watchdog
-
-
-
-
- /lib/modules/KERNEL_VERSION/kernel/drivers/ipmi
-
-
-
-
- Alternatively, ask your hardware or
- system vendor for details on system-specific watchdog configuration.
-
-
- Commonly used watchdog drivers
-
-
-
- Hardware
- Driver
-
-
-
-
- HP
- hpwdt
-
-
- Dell, Lenovo (Intel TCO)
- iTCO_wdt
-
-
- Fujitsu
- ipmi_watchdog
-
-
- LPAR on IBM Power
- pseries-wdt
-
-
- VM on IBM z/VM
- vmwatchdog
-
-
- Xen VM (DomU)
- xen_xdt
-
-
- VM on VMware vSphere
- wdat_wdt
-
-
- Generic
- softdog
-
-
-
-
-
- Accessing the watchdog timer
-
- Some hardware vendors ship systems management software that uses the
- watchdog for system resets (for example, HP ASR daemon). If the watchdog is
- used by SBD, disable such software. No other software must access the
- watchdog timer.
-
-
-
- Loading the correct kernel module
-
-
- List the drivers that are installed with your kernel version:
-
-&prompt.root;rpm -ql kernel-VERSION | grep watchdog
-
-
-
- List any watchdog modules that are currently loaded in the kernel:
-
-&prompt.root;lsmod | egrep "(wd|dog)"
-
-
-
- If you get a result, unload the wrong module:
-
-&prompt.root;rmmod WRONG_MODULE
-
-
-
- Enable the watchdog module that matches your hardware:
-
-&prompt.root;echo WATCHDOG_MODULE > /etc/modules-load.d/watchdog.conf
-&prompt.root;systemctl restart systemd-modules-load
-
-
-
- Test whether the watchdog module is loaded correctly:
-
-&prompt.root;lsmod | grep dog
-
-
-
- Verify if the watchdog device is available:
-
-&prompt.root;ls -l /dev/watchdog*
-&prompt.root;sbd query-watchdog
-
- If the watchdog device is not available, check the module name and options.
- Maybe use another driver.
-
-
-
-
- Verify if the watchdog device works:
-
-&prompt.root;sbd -w WATCHDOG_DEVICE test-watchdog
-
-
-
- Reboot your machine to make sure there are no conflicting kernel modules. For example,
- if you find the message cannot register ... in your log, this would indicate
- such conflicting modules. To ignore such modules, refer to
- .
-
-
-
-
-
-
- Using the software watchdog (softdog)
-
- For clusters in production environments, we recommend using a hardware-specific watchdog
- driver. However, if no watchdog matches your hardware,
- softdog can be used instead.
-
-
- Softdog limitations
-
- The softdog driver assumes that at least one CPU is still running. If all CPUs are stuck,
- the code in the softdog driver that should reboot the system is never executed.
- In contrast, hardware watchdogs keep working even if all CPUs are stuck.
-
-
-
- Loading the softdog kernel module
-
-
- Enable the softdog watchdog:
-
-&prompt.root;echo softdog > /etc/modules-load.d/watchdog.conf
-&prompt.root;systemctl restart systemd-modules-load
-
-
-
- Check whether the softdog watchdog module is loaded correctly:
-
-&prompt.root;lsmod | grep softdog
-
-
-
-
-
From 438571d010653863efc5901ce5d506e39e5c5619 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Fri, 20 Sep 2024 17:12:03 +1000
Subject: [PATCH 32/39] Remove duplicate Hawk2 procedure
---
xml/ha_bootstrap_install.xml | 73 +++++-------------------------------
1 file changed, 10 insertions(+), 63 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index f409e7da..bb646ca9 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -368,6 +368,9 @@
starts the cluster services to bring the cluster online and enables &hawk2;.
The URL to use for &hawk2; is displayed on the screen.
+
+ To log in to &hawk2;, see .
+ Secure password for hacluster
@@ -380,69 +383,6 @@
-
-
- Logging in to the &hawk2; web interface
-
- You now have a running one-node cluster. To view its status, proceed as follows:
-
-
- Logging in to the &hawk2; Web interface
-
-
- On any machine, start a Web browser and make sure that JavaScript and cookies are enabled.
-
-
-
-
- As URL, enter the virtual IP address that you configured with the bootstrap script:
-
-https://VIRTUAL_IP:7630/
-
- Certificate warning
-
- If a certificate warning appears when you try to access the URL for the first time,
- a self-signed certificate is in use. Self-signed certificates are not considered
- trustworthy by default.
-
-
- Ask your cluster operator for the certificate details to verify the certificate.
-
-
- To proceed anyway, you can add an exception in the browser to bypass the warning.
-
-
-
-
-
- On the &hawk2; login screen, enter the Username of the user that was
- created by the bootstrap script (hacluster)
- and the secure Password that you changed from the bootstrap script's
- default password.
-
-
-
- If you have not already changed the default password to a secure one, do so now.
-
-
-
-
-
- Click Log In. The &hawk2; Web interface shows the
- Status screen by default:
-
-
-
-
-
-
Adding nodes with crm cluster join
@@ -520,4 +460,11 @@ crm cluster join --use-ssh-agent -c USER@StatusNodes.
+
+
+ Modifying the cluster with crm cluster init stages
+
+ TO DO
+
+
From 8bd8a5ebb887fbfdcd17d125fec9e9bf5dcfb7e6 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Wed, 2 Oct 2024 17:13:41 +1000
Subject: [PATCH 33/39] Add section for crm cluster remove
---
xml/ha_bootstrap_install.xml | 57 ++++++++++++++++++++++++++++--------
1 file changed, 44 insertions(+), 13 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index bb646ca9..ef18926b 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -12,7 +12,7 @@
xmlns="http://docbook.org/ns/docbook" version="5.1"
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:xlink="http://www.w3.org/1999/xlink">
-
Using the bootstrap script
+ Using the bootstrap scripts
@@ -158,7 +158,7 @@
You can start the script without specifying any options. This prompts you for input for
- some settings, as described in the next steps, and uses &crmsh;'s default values for
+ certain settings as described in later steps, and uses &crmsh;'s default values for
other settings.
@@ -225,6 +225,11 @@
(or ) multiple times. For example:
&prompt.root;crm cluster init --sbd-device /dev/disk/by-id/ID1 --sbd-device /dev/disk/by-id/ID2
+
+ This option is also useful because you can use tab completion for the device ID,
+ which is not available later when the script prompts you for the path.
+
+
@@ -352,7 +357,7 @@
Choose whether to enable TLS.
-
+
Enter heuristics commands to affect how votes are determined. To skip this step, leave
the field blank.
@@ -430,27 +435,25 @@ crm cluster join --use-ssh-agent -c USER@
- If NTP is not configured to start at boot time, a message
- appears. The script also checks for a hardware watchdog device.
- You are warned if none is present.
+ If NTP is not configured to start at boot time, a message appears. The script also checks
+ for a hardware watchdog device. You are warned if none is present.
- If you did not already specify the first cluster node
- with , you will be prompted for its IP address.
+ If you did not already specify the first cluster node with ,
+ you are prompted for its IP address.
If you did not already configure passwordless SSH access between the cluster nodes,
- you will be prompted for the password of the first node.
+ you are prompted for the password of the first node.
- After logging in to the specified node, the script copies the
- &corosync; configuration, configures SSH and &csync;,
- brings the current machine online as a new cluster node, and
- starts the service needed for &hawk2;.
+ After logging in to the specified node, the script copies the &corosync; configuration,
+ configures SSH and &csync;, brings the current machine online as a new cluster node,
+ and starts the service needed for &hawk2;.
@@ -467,4 +470,32 @@ crm cluster join --use-ssh-agent -c USER@
+
+
+ Removing nodes with crm cluster remove
+
+ You can remove nodes from the cluster with the crm cluster remove
+ bootstrap script.
+
+
+ If you run crm cluster remove with no additional parameters, you are
+ prompted for the IP address or host name of the node to remove. Alternatively, you can
+ specify the node when you run the command:
+
+&prompt.root;crm cluster remove NODE
+
+ On the specified node, this stops all cluster services and removes the local cluster
+ configuration files. On the rest of the cluster nodes, the specified node is removed
+ from the cluster configuration.
+
+
+ In most cases, you must run crm cluster remove from a different node,
+ not from the node you want to remove. However, to remove the last node
+ and delete the cluster, you can use (or ):
+
+&prompt.root;crm cluster remove --force LASTNODE
+
+ For more information, run crm cluster help remove.
+
+
From 49bddec237cbb7ee818815863e428732a1312009 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 3 Oct 2024 15:57:34 +1000
Subject: [PATCH 34/39] Fix help command
---
xml/ha_bootstrap_install.xml | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/xml/ha_bootstrap_install.xml b/xml/ha_bootstrap_install.xml
index ef18926b..d4e2df2e 100644
--- a/xml/ha_bootstrap_install.xml
+++ b/xml/ha_bootstrap_install.xml
@@ -187,7 +187,7 @@
Alternatively, you can specify additional options as part of the initialization command.
You can include multiple options in the same command. Some examples are shown below.
- For more options, run crm cluster help init.
+ For more options, run crm cluster init --help.
@@ -464,13 +464,6 @@ crm cluster join --use-ssh-agent -c USER@
-
- Modifying the cluster with crm cluster init stages
-
- TO DO
-
-
-
Removing nodes with crm cluster remove
@@ -495,7 +488,7 @@ crm cluster join --use-ssh-agent -c USER@&prompt.root;crm cluster remove --force LASTNODE
- For more information, run crm cluster help remove.
+ For more information, run crm cluster remove --help.
From ed313d3856e3813fcd42003df4b9d8e9b8270501 Mon Sep 17 00:00:00 2001
From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com>
Date: Thu, 3 Oct 2024 16:48:15 +1000
Subject: [PATCH 35/39] Fix pattern name
---
xml/ha_requirements.xml | 4 +-
xml/html/rh-book-administration.html | 85 ----------------------------
2 files changed, 2 insertions(+), 87 deletions(-)
delete mode 100644 xml/html/rh-book-administration.html
diff --git a/xml/ha_requirements.xml b/xml/ha_requirements.xml
index c7e3d238..0a2ed5ea 100644
--- a/xml/ha_requirements.xml
+++ b/xml/ha_requirements.xml
@@ -96,7 +96,7 @@
HA Node system role
- &ha; (sles_ha)
+ &ha; (ha_sles)
Enhanced Base System (enhanced_base)
@@ -123,7 +123,7 @@
You might need to add more packages manually, if required.
For machines that originally had another system role assigned, you need to
- manually install the sles_ha or
+ manually install the ha_sles or
ha_geo patterns and any further packages that you
need.
diff --git a/xml/html/rh-book-administration.html b/xml/html/rh-book-administration.html
deleted file mode 100644
index 47bbfc19..00000000
--- a/xml/html/rh-book-administration.html
+++ /dev/null
@@ -1,85 +0,0 @@
-
-Revision History: Administration Guide
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Revision History: Administration Guide
2024-06-26
-
- Updated for the initial release of SUSE Linux Enterprise High Availability 15 SP6.
-