Skip to content

Commit

Permalink
flap interface after sfp reset (#16375)
Browse files Browse the repository at this point in the history
Description of PR
Summary:
Fixes interface stays down after tests/platform_tests/api/test_sfp.py::sfp_reset()

And causing error during shutdown_ebgp fixture teardown.

Type of change
 Bug fix
 Testbed and Framework(new/improvement)
 Test case(new/improvement)
Back port request
 202012
 202205
 202305
 202311
 202405
 202411
Approach
What is the motivation for this PR?
keep interface up after sfp_reset if it's T2 and QSFP-DD SFP.

How did you do it?
flap the interface after sfp_reset to restore the interface state.

How did you verify/test it?
passed on physical testbed with

admin@svcstr2-8800-lc1-1:~$ sudo sfputil show eeprom -d -p Ethernet0
Ethernet0: SFP EEPROM detected
...
        Application Advertisement: 400GAUI-8 C2M (Annex 120E) - Host Assign (0x1) - Active Cable assembly with BER < 5x10^-5 - Media Assign (0x1)
                                   CAUI-4 C2M (Annex 83E) - Host Assign (0x1) - Active Cable assembly with BER < 5x10^-5 - Media Assign (0x1)
        CMIS Revision: 4.0
        Connector: No separable connector
        Encoding: N/A
        Extended Identifier: Power Class 5 (10.0W Max)
        Extended RateSelect Compliance: N/A
        Hardware Revision: 1.0
        Host Electrical Interface: 400GAUI-8 C2M (Annex 120E)
        Host Lane Assignment Options: 1
        Host Lane Count: 8
        Identifier: QSFP-DD Double Density 8X Pluggable Transceiver
        Length Cable Assembly(m): 1.0
......
platform_tests/api/test_sfp.py::TestSfpApi::test_reset[xxx-lc1-1] PASSED [ 73%]
......
=========================== short test summary info ============================
FAILED platform_tests/api/test_sfp.py::TestSfpApi::test_lpmode[svcstr2-8800-lc1-1]                        <<<< this is separate issue, not related to this PR.
========================= 1 failed, 22 passed, 1 warning in 2104.41s (0:35:04) =========================
``
============= 1 failed, 22 passed, 1 warning in 2104.41s (0:35:04) =============
Any platform specific information?
Supported testbed topology if it's a new test case?

Co-authored-by: [email protected]
  • Loading branch information
sdszhang authored Jan 17, 2025
1 parent d8e8177 commit 6814013
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 1 deletion.
28 changes: 28 additions & 0 deletions tests/common/devices/multi_asic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tests.common.devices.sonic_asic import SonicAsic
from tests.common.helpers.assertions import pytest_assert
from tests.common.helpers.constants import DEFAULT_ASIC_ID, DEFAULT_NAMESPACE, ASICS_PRESENT
from tests.common.platform.interface_utils import get_dut_interfaces_status

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -856,3 +857,30 @@ def ports_list(self):
"""
mg_facts = self.sonichost.minigraph_facts(host=self.sonichost.hostname)
return list(mg_facts['ansible_facts']['minigraph_ports'].keys())

def get_admin_up_ports(self):
intf_status = get_dut_interfaces_status(self.sonichost)
admin_up_ports = [k for k, v in intf_status.items() if v['admin'] == "up"]
return admin_up_ports

def shutdown_interface(self, port):
"""
This function works for both multi/single-asic dut
"""
logging.info("Shutting down {}".format(port))
if self.sonichost.is_multi_asic:
asic_ns = self.get_port_asic_instance(port).namespace
return self.command(f"sudo config interface -n {asic_ns} shutdown {port}")
else:
return self.command(f"sudo config interface shutdown {port}")

def no_shutdown_interface(self, port):
"""
This function works for both multi/single-asic dut
"""
logging.info("Bring up {}".format(port))
if self.sonichost.is_multi_asic:
asic_ns = self.get_port_asic_instance(port).namespace
return self.command(f"sudo config interface -n {asic_ns} startup {port}")
else:
return self.command(f"sudo config interface startup {port}")
6 changes: 6 additions & 0 deletions tests/common/platform/interface_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ def parse_intf_status(lines):
return result


def get_dut_interfaces_status(duthost):
output = duthost.command("show interface description")
intf_status = parse_intf_status(output["stdout_lines"][2:])
return intf_status


def check_interface_status_of_up_ports(duthost):
if duthost.facts['asic_type'] == 'vs' and duthost.is_supervisor_node():
return True
Expand Down
2 changes: 2 additions & 0 deletions tests/common/platform/transceiver_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import re
from copy import deepcopy

I2C_WAIT_TIME_AFTER_SFP_RESET = 5 # in seconds


def parse_transceiver_info(output_lines):
"""
Expand Down
1 change: 1 addition & 0 deletions tests/common/port_toggle.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
logger = logging.getLogger(__name__)

BASE_PORT_COUNT = 28.0 # default t0 topology has 28 ports to toggle
WAIT_TIME_AFTER_INTF_SHUTDOWN = 5 # in seconds


def port_toggle(duthost, tbinfo, ports=None, wait_time_getter=None, wait_after_ports_up=60, watch=False):
Expand Down
41 changes: 41 additions & 0 deletions tests/platform_tests/api/test_sfp.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import ast
import logging
import pytest
import time

from tests.common.helpers.assertions import pytest_assert
from tests.common.helpers.platform_api import sfp
from tests.common.utilities import skip_release
from tests.common.utilities import skip_release_for_platform
from tests.common.platform.interface_utils import get_physical_port_indices
from tests.common.platform.interface_utils import check_interface_status_of_up_ports
from tests.common.port_toggle import default_port_toggle_wait_time, WAIT_TIME_AFTER_INTF_SHUTDOWN
from tests.common.platform.transceiver_utils import I2C_WAIT_TIME_AFTER_SFP_RESET
from tests.common.utilities import wait_until
from tests.common.fixtures.conn_graph_facts import conn_graph_facts # noqa F401
from tests.common.fixtures.duthost_utils import shutdown_ebgp # noqa F401
Expand Down Expand Up @@ -43,6 +47,7 @@ def setup(request, duthosts, enum_rand_one_per_hwsku_hostname,

# We are interested only in ports that are used for device connection
physical_intfs = conn_graph_facts["device_conn"][duthost.hostname]
sfp_setup["conn_interfaces"] = physical_intfs

physical_port_index_map = get_physical_port_indices(duthost, physical_intfs)
sfp_setup["physical_port_index_map"] = physical_port_index_map
Expand Down Expand Up @@ -682,17 +687,53 @@ def test_reset(self, request, duthosts, enum_rand_one_per_hwsku_hostname, localh
# TODO: Verify that the transceiver was actually reset
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
skip_release_for_platform(duthost, ["202012"], ["arista", "mlnx"])
port_index_to_info_dict = {}

for i in self.sfp_setup["sfp_test_port_indices"]:
info_dict = sfp.get_transceiver_info(platform_api_conn, i)
if not self.expect(info_dict is not None, "Unable to retrieve transceiver {} info".format(i)):
continue
port_index_to_info_dict[i] = info_dict

ret = sfp.reset(platform_api_conn, i)
if self.is_xcvr_resettable(request, info_dict):
self.expect(ret is True, "Failed to reset transceiver {}".format(i))
else:
self.expect(ret is False, "Resetting transceiver {} succeeded but should have failed".format(i))

# allow the I2C interface to recover post sfp reset
time.sleep(I2C_WAIT_TIME_AFTER_SFP_RESET)

# shutdown and bring up in batch so that we don't have to add delay for each interface.
intfs_changed = []
admin_up_port_list = duthost.get_admin_up_ports()
for intf in self.sfp_setup['conn_interfaces']:
if intf not in admin_up_port_list:
# skip interfaces which are not in admin up state.
continue

sfp_port_idx = self.sfp_setup['physical_port_index_map'][intf]
# skip if info_dict is not retrieved during reset, which also means reset was not performed.
if sfp_port_idx not in port_index_to_info_dict:
continue
info_dict = port_index_to_info_dict[sfp_port_idx]

# only flap interfaces where are CMIS optics,
# non-CMIS optics should stay up after sfp_reset(), no need to flap.
if "cmis_rev" in info_dict:
duthost.shutdown_interface(intf)
intfs_changed.append(intf)

time.sleep(WAIT_TIME_AFTER_INTF_SHUTDOWN)

for intf in intfs_changed:
duthost.no_shutdown_interface(intf)

_, port_up_wait_time = default_port_toggle_wait_time(duthost, len(intfs_changed))
if not wait_until(port_up_wait_time, 10, 0,
check_interface_status_of_up_ports, duthost):
self.expect(False, "Not all interfaces are up after reset")

self.assert_expectations()

def test_tx_disable(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost, platform_api_conn): # noqa F811
Expand Down
2 changes: 1 addition & 1 deletion tests/platform_tests/sfp/test_sfputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from tests.common.utilities import skip_release, wait_until
from tests.common.fixtures.duthost_utils import shutdown_ebgp # noqa F401
from tests.common.port_toggle import default_port_toggle_wait_time
from tests.common.platform.transceiver_utils import I2C_WAIT_TIME_AFTER_SFP_RESET
from tests.common.platform.interface_utils import get_physical_port_indices

cmd_sfp_presence = "sudo sfputil show presence"
Expand All @@ -34,7 +35,6 @@
DOM_ENABLED = "enabled"
DOM_POLLING_CONFIG_VALUES = [DOM_DISABLED, DOM_ENABLED]

I2C_WAIT_TIME_AFTER_SFP_RESET = 5 # in seconds
WAIT_TIME_AFTER_LPMODE_SET = 3 # in seconds

logger = logging.getLogger(__name__)
Expand Down

0 comments on commit 6814013

Please sign in to comment.