Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test_monitoring_after_rebooting_node_where_mgr_is_running failed to verify ceph health after node reboot #10712

Open
nagendra202 opened this issue Oct 22, 2024 · 1 comment
Assignees

Comments

@nagendra202
Copy link
Contributor

self = <tests.functional.workloads.ocp.monitoring.test_monitoring_on_negative_scenarios.TestMonitoringBackedByOCS object at 0x7fdcb926a5b0>
nodes = <ocs_ci.ocs.platform_nodes.VMWareUPINodes object at 0x7fdce666b5e0>
pods = [<ocs_ci.ocs.resources.pod.Pod object at 0x7fdcd5b3e8e0>, <ocs_ci.ocs.resources.pod.Pod object at 0x7fdceb242130>, <oc...790>, <ocs_ci.ocs.resources.pod.Pod object at 0x7fdcb6277fa0>, <ocs_ci.ocs.resources.pod.Pod object at 0x7fdca2894e50>]
threading_lock = <unlocked _thread.RLock object owner=0 count=0 at 0x7fdd463923c0>

`
@pytest.mark.polarion_id("OCS-710")
def test_monitoring_after_rebooting_node_where_mgr_is_running(
self, nodes, pods, threading_lock
):
"""
Test case to validate rebooting a node where mgr is running
should not delete the data collected on prometheus pod

"""

# Get the mgr pod obj
mgr_pod_obj = pod.get_mgr_pods()

# Get the node where the mgr pod is hosted
mgr_node_obj = pod.get_pod_node(mgr_pod_obj[0])

# Reboot the node where the mgr pod is hosted
nodes.restart_nodes([mgr_node_obj])

# Validate all nodes are in READY state
retry((CommandFailed, ResourceWrongStatusException), tries=20, delay=15)(
    wait_for_nodes_status()
)

# Check for Ceph pods
pod_obj = ocp.OCP(
    kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]
)
assert pod_obj.wait_for_resource(
    condition="Running", selector="app=rook-ceph-mgr", timeout=600
)
assert pod_obj.wait_for_resource(
    condition="Running",
    selector="app=rook-ceph-mon",
    resource_count=3,
    timeout=600,
)
assert pod_obj.wait_for_resource(
    condition="Running",
    selector="app=rook-ceph-osd",
    resource_count=3,
    timeout=600,
)

# Check the node are Ready state and check cluster is health ok
self.sanity_helpers.health_check(tries=40)

# Check for ceph health check metrics is updated with new mgr pod

wait_to_update_mgrpod_info_prometheus_pod(threading_lock)
`
tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py:503:

ocs_ci/utility/retry.py:49: in f_retry
return f(args, *kwargs)

threading_lock = <unlocked _thread.RLock object owner=0 count=0 at 0x7fdd463923c0>

`
@Retry(AssertionError, tries=30, delay=3, backoff=1)
def wait_to_update_mgrpod_info_prometheus_pod(threading_lock):
"""
Validates the ceph health metrics is updated on prometheus pod

Args:
    threading_lock (threading.RLock): A lock to ensure only one thread is making the 'oc' calls

"""

log.info("Verifying ceph health status metrics is updated after rebooting the node")
ocp_obj = ocp.OCP(
    kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]
)
mgr_pod = (
    ocp_obj.get(selector=constants.MGR_APP_LABEL)
    .get("items")[0]
    .get("metadata")
    .get("name")
)

assert check_ceph_health_status_metrics_on_prometheus(
mgr_pod=mgr_pod, threading_lock=threading_lock
), "Ceph health status metrics are not updated after the rebooting node where the mgr running"
E AssertionError: Ceph health status metrics are not updated after the rebooting node where the mgr running
E assert False
E + where False = check_ceph_health_status_metrics_on_prometheus(mgr_pod='rook-ceph-mgr-a-5d54947b9c-scwpp', threading_lock=<unlocked _thread.RLock object owner=0 count=0 at 0x7fdd463923c0>)

`

tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py:65: AssertionError

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant