test_monitoring_after_rebooting_node_where_mgr_is_running failed to verify ceph health after node reboot #10712

nagendra202 · 2024-10-22T06:35:33Z

self = <tests.functional.workloads.ocp.monitoring.test_monitoring_on_negative_scenarios.TestMonitoringBackedByOCS object at 0x7fdcb926a5b0>
nodes = <ocs_ci.ocs.platform_nodes.VMWareUPINodes object at 0x7fdce666b5e0>
pods = [<ocs_ci.ocs.resources.pod.Pod object at 0x7fdcd5b3e8e0>, <ocs_ci.ocs.resources.pod.Pod object at 0x7fdceb242130>, <oc...790>, <ocs_ci.ocs.resources.pod.Pod object at 0x7fdcb6277fa0>, <ocs_ci.ocs.resources.pod.Pod object at 0x7fdca2894e50>]
threading_lock = <unlocked _thread.RLock object owner=0 count=0 at 0x7fdd463923c0>

`
@pytest.mark.polarion_id("OCS-710")
def test_monitoring_after_rebooting_node_where_mgr_is_running(
self, nodes, pods, threading_lock
):
"""
Test case to validate rebooting a node where mgr is running
should not delete the data collected on prometheus pod

"""

# Get the mgr pod obj
mgr_pod_obj = pod.get_mgr_pods()

# Get the node where the mgr pod is hosted
mgr_node_obj = pod.get_pod_node(mgr_pod_obj[0])

# Reboot the node where the mgr pod is hosted
nodes.restart_nodes([mgr_node_obj])

# Validate all nodes are in READY state
retry((CommandFailed, ResourceWrongStatusException), tries=20, delay=15)(
    wait_for_nodes_status()
)

# Check for Ceph pods
pod_obj = ocp.OCP(
    kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]
)
assert pod_obj.wait_for_resource(
    condition="Running", selector="app=rook-ceph-mgr", timeout=600
)
assert pod_obj.wait_for_resource(
    condition="Running",
    selector="app=rook-ceph-mon",
    resource_count=3,
    timeout=600,
)
assert pod_obj.wait_for_resource(
    condition="Running",
    selector="app=rook-ceph-osd",
    resource_count=3,
    timeout=600,
)

# Check the node are Ready state and check cluster is health ok
self.sanity_helpers.health_check(tries=40)

# Check for ceph health check metrics is updated with new mgr pod

wait_to_update_mgrpod_info_prometheus_pod(threading_lock)
`
tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py:503:

ocs_ci/utility/retry.py:49: in f_retry
return f(args, *kwargs)

threading_lock = <unlocked _thread.RLock object owner=0 count=0 at 0x7fdd463923c0>

`
@Retry(AssertionError, tries=30, delay=3, backoff=1)
def wait_to_update_mgrpod_info_prometheus_pod(threading_lock):
"""
Validates the ceph health metrics is updated on prometheus pod

Args:
    threading_lock (threading.RLock): A lock to ensure only one thread is making the 'oc' calls

"""

log.info("Verifying ceph health status metrics is updated after rebooting the node")
ocp_obj = ocp.OCP(
    kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]
)
mgr_pod = (
    ocp_obj.get(selector=constants.MGR_APP_LABEL)
    .get("items")[0]
    .get("metadata")
    .get("name")
)

assert check_ceph_health_status_metrics_on_prometheus(
mgr_pod=mgr_pod, threading_lock=threading_lock
), "Ceph health status metrics are not updated after the rebooting node where the mgr running"
E AssertionError: Ceph health status metrics are not updated after the rebooting node where the mgr running
E assert False
E + where False = check_ceph_health_status_metrics_on_prometheus(mgr_pod='rook-ceph-mgr-a-5d54947b9c-scwpp', threading_lock=<unlocked _thread.RLock object owner=0 count=0 at 0x7fdd463923c0>)

`

tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py:65: AssertionError

The text was updated successfully, but these errors were encountered:

nagendra202 · 2024-10-22T06:36:11Z

RP: https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/738/26072/1273832/1273877/log?logParams=history%3D1251288%26page.page%3D1

nagendra202 self-assigned this Oct 22, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

test_monitoring_after_rebooting_node_where_mgr_is_running failed to verify ceph health after node reboot #10712

test_monitoring_after_rebooting_node_where_mgr_is_running failed to verify ceph health after node reboot #10712

nagendra202 commented Oct 22, 2024

nagendra202 commented Oct 22, 2024

test_monitoring_after_rebooting_node_where_mgr_is_running failed to verify ceph health after node reboot #10712

test_monitoring_after_rebooting_node_where_mgr_is_running failed to verify ceph health after node reboot #10712

Comments

nagendra202 commented Oct 22, 2024

nagendra202 commented Oct 22, 2024