diff --git a/deploy/csi-daemonset.yaml b/deploy/csi-daemonset.yaml index 93a380d1b..b0f3271d8 100644 --- a/deploy/csi-daemonset.yaml +++ b/deploy/csi-daemonset.yaml @@ -30,7 +30,7 @@ spec: # the same. containers: - name: mayastor-csi - image: mayadata/mayastor:v1.0.5 + image: mayadata/mayastor:v1.0.7 imagePullPolicy: IfNotPresent # we need privileged because we mount filesystems and use mknod securityContext: diff --git a/deploy/mayastor-daemonset.yaml b/deploy/mayastor-daemonset.yaml index 7525053e8..d40f43716 100644 --- a/deploy/mayastor-daemonset.yaml +++ b/deploy/mayastor-daemonset.yaml @@ -33,7 +33,7 @@ spec: command: ['sh', '-c', 'until nc -vz nats 4222; do echo "Waiting for message bus..."; sleep 1; done;'] containers: - name: mayastor - image: mayadata/mayastor:v1.0.5 + image: mayadata/mayastor:v1.0.7 imagePullPolicy: IfNotPresent env: - name: RUST_LOG diff --git a/mayastor/src/bdev/nexus/nexus_bdev.rs b/mayastor/src/bdev/nexus/nexus_bdev.rs index 0df12fa59..0ef7b6957 100644 --- a/mayastor/src/bdev/nexus/nexus_bdev.rs +++ b/mayastor/src/bdev/nexus/nexus_bdev.rs @@ -983,9 +983,9 @@ impl<'n> Nexus<'n> { ) -> Result<(), Error> { self.child_retire_for_each_channel(Some(name.clone())) .await?; - debug!(?self, "PAUSE"); + debug!(?self, "PAUSING"); self.as_mut().pause().await?; - debug!(?self, "UNPAUSE"); + debug!(?self, "PAUSED"); if let Some(child) = self.lookup_child(&name) { let uri = child.name.clone(); // schedule the deletion of the child eventhough etcd has not been @@ -1030,6 +1030,16 @@ impl<'n> Nexus<'n> { }))) .await; } + // If we are faulted then rather than failing all IO back to the + // initiator we can instead leave the subsystem paused, and wait + // for the control-plane to do something about this. + // Meanwhile the initiator will begin it's reconnect loop and won't see + // a swarm of IO failures which could cause a fs to shutdown. + if self.status() == NexusStatus::Faulted { + tracing::warn!(?self, "Nexus Faulted: not resuming subsystem"); + return Ok(()); + } + debug!(?self, "RESUMING"); self.resume().await } diff --git a/scripts/check-deploy-yamls.sh b/scripts/check-deploy-yamls.sh index e66c422be..6066ac081 100755 --- a/scripts/check-deploy-yamls.sh +++ b/scripts/check-deploy-yamls.sh @@ -8,7 +8,7 @@ DEPLOYDIR="$ROOTDIR"/deploy CORES=2 PROFILE=release -TAG=v1.0.5 +TAG=v1.0.7 "$SCRIPTDIR"/generate-deploy-yamls.sh -c "$CORES" -t "$TAG" "$PROFILE" diff --git a/scripts/pytest-tests.sh b/scripts/pytest-tests.sh index c2fdcf874..2f003cfce 100755 --- a/scripts/pytest-tests.sh +++ b/scripts/pytest-tests.sh @@ -21,6 +21,14 @@ function run_tests() python -m pytest --tc-file='test_config.ini' --docker-compose="$name" "$name" ) fi + if [ -f "$name" ] + then + ( + set -x + base=$(dirname "$name") + python -m pytest --tc-file='test_config.ini' --docker-compose="$base" "$name" + ) + fi done } @@ -46,6 +54,7 @@ tests/replica_uuid # tests/rpc tests/nexus_multipath +tests/nexus_fault tests/nexus v1/pool diff --git a/shell.nix b/shell.nix index 6e82bf6d8..e2b264912 100644 --- a/shell.nix +++ b/shell.nix @@ -46,6 +46,7 @@ mkShell { pytest_inputs python3 utillinux + xfsprogs libunwind ] ++ (if (nospdk) then [ libspdk-dev.buildInputs ] else [ libspdk-dev ]); diff --git a/test/python/common/fio.py b/test/python/common/fio.py index 89cf4caf9..ab4b06af5 100644 --- a/test/python/common/fio.py +++ b/test/python/common/fio.py @@ -2,7 +2,7 @@ class Fio(object): - def __init__(self, name, rw, device, runtime=15, optstr=""): + def __init__(self, name, rw, device, size=None, runtime=15, optstr=""): self.name = name self.rw = rw self.device = device @@ -11,17 +11,26 @@ def __init__(self, name, rw, device, runtime=15, optstr=""): self.success = {} self.runtime = runtime self.optstr = optstr + self.size = size def build(self): devs = [self.device] if isinstance(self.device, str) else self.device + size = "" + if self.size is not None: + size = "--size={}".format(self.size) command = ( "sudo fio --ioengine=linuxaio --direct=1 --bs=4k " "--time_based=1 {} --rw={} " "--group_reporting=1 --norandommap=1 --iodepth=64 " - "--runtime={} --name={} --filename={}" + "--runtime={} --name={} --filename={} {}" ).format( - self.optstr, self.rw, self.runtime, self.name, ":".join(map(str, devs)) + self.optstr, + self.rw, + self.runtime, + self.name, + ":".join(map(str, devs)), + size, ) return command diff --git a/test/python/requirements.txt b/test/python/requirements.txt index 7bd95fa06..d5d283dfe 100644 --- a/test/python/requirements.txt +++ b/test/python/requirements.txt @@ -9,3 +9,4 @@ pytest-docker-compose pytest-testconfig pytest-timeout pytest-variables +retrying \ No newline at end of file diff --git a/test/python/tests/nexus_fault/docker-compose.yml b/test/python/tests/nexus_fault/docker-compose.yml new file mode 100644 index 000000000..02d41997a --- /dev/null +++ b/test/python/tests/nexus_fault/docker-compose.yml @@ -0,0 +1,66 @@ +# +# {SRCDIR} should point to your working tree which should be your current pwd +# + +version: '3' +services: + ms0: + container_name: "ms0" + image: rust:latest + environment: + - MY_POD_IP=10.0.0.2 + - NEXUS_NVMF_ANA_ENABLE=1 + - NEXUS_NVMF_RESV_ENABLE=1 + command: ${SRCDIR}/target/debug/mayastor -g 0.0.0.0 -l 1 -r /tmp/ms0.sock + networks: + mayastor_net: + ipv4_address: 10.0.0.2 + cap_add: + # NUMA related + - SYS_ADMIN + - SYS_NICE + # uring needs mmap + - IPC_LOCK + security_opt: + # we can set this to a JSON file to allow per syscall access + - seccomp=unconfined + volumes: + - ${SRCDIR}:${SRCDIR} + - /nix:/nix + - /dev/hugepages:/dev/hugepages + - /tmp:/tmp + - /var/tmp:/var/tmp + ms1: + container_name: "ms1" + image: rust:latest + environment: + - MY_POD_IP=10.0.0.3 + - NEXUS_NVMF_ANA_ENABLE=1 + - NEXUS_NVMF_RESV_ENABLE=1 + command: ${SRCDIR}/target/debug/mayastor -g 0.0.0.0 -l 2 -r /tmp/ms1.sock + networks: + mayastor_net: + ipv4_address: 10.0.0.3 + cap_add: + # NUMA related + - SYS_ADMIN + - SYS_NICE + # uring needs mmap + - IPC_LOCK + security_opt: + # we can set this to a JSON file to allow per syscall access + - seccomp=unconfined + volumes: + - ${SRCDIR}:${SRCDIR} + - /nix:/nix + - /dev/hugepages:/dev/hugepages + - /tmp:/tmp + - /var/tmp:/var/tmp + +networks: + mayastor_net: + name: mayastor_net + ipam: + driver: default + config: + - subnet: "10.0.0.0/16" diff --git a/test/python/tests/nexus_fault/features/nexus_fault.feature b/test/python/tests/nexus_fault/features/nexus_fault.feature new file mode 100644 index 000000000..f493926f4 --- /dev/null +++ b/test/python/tests/nexus_fault/features/nexus_fault.feature @@ -0,0 +1,16 @@ +Feature: Faulted nexus I/O management + + Background: + Given a local mayastor instance + And a remote mayastor instance + + Scenario: a temporarily faulted nexus should not cause initiator filesystem to shutdown + Given a single replica (remote) nexus is published via nvmf + And the nexus is connected to a kernel initiator + And a filesystem is placed on top of the connected device + And the filesystem is mounted + And a fio workload is started on top of the mounted filesystem + When the remote mayastor instance is restarted + And the faulted nexus is recreated + Then the fio workload should complete gracefully + And the initiator filesystem should not be shutdown diff --git a/test/python/tests/nexus_fault/test_nexus_fault.py b/test/python/tests/nexus_fault/test_nexus_fault.py new file mode 100644 index 000000000..c9ca1a1c5 --- /dev/null +++ b/test/python/tests/nexus_fault/test_nexus_fault.py @@ -0,0 +1,239 @@ +"""Faulted nexus I/O management feature tests.""" + +import pytest +from pytest_bdd import ( + given, + scenario, + then, + when, +) + +import os +import subprocess +import time + +from retrying import retry + +from common.command import run_cmd +from common.fio import Fio +from common.mayastor import container_mod, mayastor_mod +from common.nvme import nvme_connect, nvme_disconnect + +import grpc +import nexus_pb2 as pb + + +def megabytes(n): + return n * 1024 * 1024 + + +@scenario( + "features/nexus_fault.feature", + "a temporarily faulted nexus should not cause initiator filesystem to shutdown", +) +def test_a_temporarily_faulted_nexus_should_not_cause_initiator_filesystem_to_shutdown(): + """a temporarily faulted nexus should not cause initiator filesystem to shutdown.""" + + +@given("a filesystem is placed on top of the connected device") +def _(connect_nexus_1): + """a filesystem is placed on top of the connected device.""" + device = connect_nexus_1 + print(device) + run_cmd(f"sudo mkfs.xfs {device}") + + +@given( + "a fio workload is started on top of the mounted filesystem", target_fixture="fio" +) +def _(mounted_nexus): + """a fio workload is started on top of the mounted filesystem.""" + fio_cmd = Fio( + f"job-raw", "randwrite", f"{mounted_nexus}/fio.io", size="200M" + ).build() + print(fio_cmd) + yield subprocess.Popen(fio_cmd, shell=True) + + +@given("a local mayastor instance") +def _(remote_instance): + """a local mayastor instance.""" + + +@given("a remote mayastor instance") +def _(local_instance): + """a remote mayastor instance.""" + + +@given("a single replica (remote) nexus is published via nvmf") +def _(create_nexus): + """a single replica (remote) nexus is published via nvmf.""" + nexus = create_nexus + print(nexus) + + +@given("the filesystem is mounted", target_fixture="mounted_nexus") +def _(connect_nexus_1): + """the filesystem is mounted.""" + dev = connect_nexus_1 + path = f"/mnt{dev}" + run_cmd(f"sudo mkdir -p {path}") + run_cmd(f"sudo mount {dev} {path}") + yield path + run_cmd(f"sudo umount {path}") + + +@given("the nexus is connected to a kernel initiator", target_fixture="connect_nexus_1") +def _(publish_nexus): + """the nexus is connected to a kernel initiator.""" + yield nvme_connect(publish_nexus) + nvme_disconnect(publish_nexus) + + +@when("the remote mayastor instance is restarted") +def _(container_mod, mayastor_mod, remote_instance, nexus_uuid, find_nexus): + """the remote mayastor instance is restarted.""" + container_mod.get(remote_instance).restart() + nexus = find_nexus(nexus_uuid) + while nexus.state != pb.NexusState.NEXUS_FAULTED: + nexus = find_nexus(nexus_uuid) + print(nexus) + + remote_ready(mayastor_mod, remote_instance) + + +@when("the faulted nexus is recreated") +def _(recreate_pool, republish_nexus): + """the faulted nexus is recreated.""" + + +@then("the fio workload should complete gracefully") +def _(fio): + """the fio workload should complete gracefully.""" + try: + code = fio.wait(timeout=60) + except subprocess.TimeoutExpired: + assert False, "FIO timed out" + assert code == 0, "FIO failed, exit code: %d" % code + + +@then("the initiator filesystem should not be shutdown") +def _(mounted_nexus): + """the initiator filesystem should not be shutdown.""" + try: + # xfs_info should still be working as the fs is not shutdown + run_cmd(f"xfs_info {mounted_nexus}") + except: + pytest.fail(f"Filesystem on {mounted_nexus} should not be shutdown") + + +@pytest.fixture(scope="module") +def remote_instance(): + yield "ms0" + + +@pytest.fixture(scope="module") +def local_instance(): + yield "ms1" + + +@pytest.fixture +def create_nexus(mayastor_mod, nexus_uuid, create_replica, local_instance): + nexus = mayastor_mod[local_instance].nexus_create( + uuid=nexus_uuid, + size=megabytes(303), + children=list([create_replica.uri]), + ) + yield nexus + + +@pytest.fixture +def publish_nexus(mayastor_mod, nexus_uuid, create_nexus, local_instance): + yield mayastor_mod[local_instance].nexus_publish(nexus_uuid) + + +@pytest.fixture +def republish_nexus(mayastor_mod, nexus_uuid, local_instance, recreate_replica): + mayastor_mod[local_instance].nexus_destroy(nexus_uuid) + mayastor_mod[local_instance].nexus_create( + uuid=nexus_uuid, + size=megabytes(303), + children=list([recreate_replica.uri]), + ) + yield mayastor_mod[local_instance].nexus_publish(nexus_uuid) + + +@pytest.fixture(scope="module") +def local_files(): + files = [f"/tmp/disk-{base}.img" for base in ["remote"]] + for path in files: + subprocess.run( + ["sudo", "sh", "-c", f"rm -f '{path}' && truncate -s 400M '{path}'"], + check=True, + ) + yield files + for path in files: + subprocess.run(["sudo", "rm", "-f", path], check=True) + + +@pytest.fixture +def create_pool(mayastor_mod, pool_remote, remote_instance, local_files): + pool = mayastor_mod[remote_instance].pool_create(pool_remote, local_files[0]) + print(pool) + yield pool + + +@pytest.fixture +def recreate_pool(mayastor_mod, pool_remote, remote_instance, local_files): + pool = mayastor_mod[remote_instance].pool_create(pool_remote, local_files[0]) + print(pool) + yield pool + + +@pytest.fixture +def create_replica(mayastor_mod, pool_remote, remote_instance, create_pool): + replica = mayastor_mod[remote_instance].replica_create( + pool_remote, "c64f5e67-e979-4933-8952-741600d3792a", megabytes(333) + ) + print(replica) + yield replica + + +@pytest.fixture +def recreate_replica(mayastor_mod, pool_remote, remote_instance, recreate_pool): + replica = mayastor_mod[remote_instance].replica_create( + pool_remote, "c64f5e67-e979-4933-8952-741600d3792a", megabytes(333) + ) + print(replica) + yield replica + + +@pytest.fixture(scope="module") +def pool_remote(): + yield "pool-remote" + + +@pytest.fixture(scope="module") +def nexus_uuid(): + yield "2c58c9f0-da89-4cb9-8097-dc67fa132493" + + +@pytest.fixture(scope="module") +def mayastor_instance(mayastor_mod): + yield mayastor_mod["ms0"] + + +@pytest.fixture(scope="module") +def find_nexus(mayastor_mod, local_instance): + def find(uuid): + for nexus in mayastor_mod[local_instance].nexus_list(): + if nexus.uuid == uuid: + return nexus + return None + + yield find + + +@retry(wait_fixed=200, stop_max_attempt_number=30) +def remote_ready(mayastor_mod, remote_instance): + mayastor_mod[remote_instance].pool_list()