Skip to content

Commit

Permalink
535.183.06
Browse files Browse the repository at this point in the history
  • Loading branch information
niv committed Jul 9, 2024
1 parent 4459285 commit c588c38
Show file tree
Hide file tree
Showing 18 changed files with 292 additions and 82 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Release 535 Entries

### [535.183.06] 2024-07-09

### [535.183.01] 2024-06-04

### [535.179] 2024-05-09
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source

This is the source release of the NVIDIA Linux open GPU kernel modules,
version 535.183.01.
version 535.183.06.


## How to Build
Expand All @@ -17,7 +17,7 @@ as root:

Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
535.183.01 driver release. This can be achieved by installing
535.183.06 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,

Expand Down Expand Up @@ -180,15 +180,15 @@ software applications.
## Compatible GPUs

The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 535.183.01 release,
(see the table below). However, in the 535.183.06 release,
GeForce and Workstation support is still considered alpha-quality.

To enable use of the open kernel modules on GeForce and Workstation GPUs,
set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:

https://us.download.nvidia.com/XFree86/Linux-x86_64/535.183.01/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.183.06/README/kernel_open.html

In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
Expand Down
2 changes: 1 addition & 1 deletion kernel-open/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.183.01\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.183.06\"

ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
Expand Down
29 changes: 26 additions & 3 deletions kernel-open/conftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,23 @@ compile_test() {
compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_BIND_IOMMUFD" "" "types"
;;

vfio_device_ops_has_detach_ioas)
#
# Determine if 'vfio_device_ops' struct has 'detach_ioas' field.
#
# Added by commit 9048c7341c4df9cae04c154a8b0f556dbe913358 ("vfio-iommufd: Add detach_ioas
# support for physical VFIO devices
#
CODE="
#include <linux/pci.h>
#include <linux/vfio.h>
int conftest_vfio_device_ops_has_detach_ioas(void) {
return offsetof(struct vfio_device_ops, detach_ioas);
}"

compile_check_conftest "$CODE" "NV_VFIO_DEVICE_OPS_HAS_DETACH_IOAS" "" "types"
;;

pci_irq_vector_helpers)
#
# Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
Expand Down Expand Up @@ -6772,10 +6789,12 @@ case "$5" in
#
VERBOSE=$6
iommu=CONFIG_VFIO_IOMMU_TYPE1
iommufd_vfio_container=CONFIG_IOMMUFD_VFIO_CONTAINER
mdev=CONFIG_VFIO_MDEV
kvm=CONFIG_KVM_VFIO
vfio_pci_core=CONFIG_VFIO_PCI_CORE
VFIO_IOMMU_PRESENT=0
VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=0
VFIO_MDEV_PRESENT=0
KVM_PRESENT=0
VFIO_PCI_CORE_PRESENT=0
Expand All @@ -6785,6 +6804,10 @@ case "$5" in
VFIO_IOMMU_PRESENT=1
fi

if (test_configuration_option ${iommufd_vfio_container} || test_configuration_option ${iommufd_vfio_container}_MODULE); then
VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT=1
fi

if (test_configuration_option ${mdev} || test_configuration_option ${mdev}_MODULE); then
VFIO_MDEV_PRESENT=1
fi
Expand All @@ -6797,7 +6820,7 @@ case "$5" in
VFIO_PCI_CORE_PRESENT=1
fi

if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
if ([ "$VFIO_IOMMU_PRESENT" != "0" ] || [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" != "0" ])&& [ "$KVM_PRESENT" != "0" ] ; then
# vGPU requires either MDEV or vfio-pci-core framework to be present.
if [ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ]; then
exit 0
Expand All @@ -6806,8 +6829,8 @@ case "$5" in

echo "Below CONFIG options are missing on the kernel for installing";
echo "NVIDIA vGPU driver on KVM host";
if [ "$VFIO_IOMMU_PRESENT" = "0" ]; then
echo "CONFIG_VFIO_IOMMU_TYPE1";
if [ "$VFIO_IOMMU_PRESENT" = "0" ] && [ "$VFIO_IOMMUFD_VFIO_CONTAINER_PRESENT" = "0" ]; then
echo "either CONFIG_VFIO_IOMMU_TYPE1 or CONFIG_IOMMUFD_VFIO_CONTAINER";
fi

if [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
Expand Down
16 changes: 8 additions & 8 deletions src/common/inc/nvBldVer.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@
#endif

#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_67-552"
#define NV_BUILD_CHANGELIST_NUM (34280977)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_67-557"
#define NV_BUILD_CHANGELIST_NUM (34462766)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_67-552"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34280977)
#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_67-557"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34462766)

#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r538_67-1"
#define NV_BUILD_CHANGELIST_NUM (34280977)
#define NV_BUILD_BRANCH_VERSION "r538_67-2"
#define NV_BUILD_CHANGELIST_NUM (34397468)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "538.69"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34280977)
#define NV_BUILD_NAME "538.78"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34397468)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section
Expand Down
2 changes: 1 addition & 1 deletion src/common/inc/nvUnixVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

#define NV_VERSION_STRING "535.183.01"
#define NV_VERSION_STRING "535.183.06"

#else

Expand Down
3 changes: 2 additions & 1 deletion src/common/nvlink/interface/nvlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,8 @@ NvlStatus nvlink_lib_get_remote_conn_info(nvlink_link *link, nvlink_conn_info *c
*/
NvlStatus nvlink_lib_discover_and_get_remote_conn_info(nvlink_link *end,
nvlink_conn_info *conn_info,
NvU32 flags);
NvU32 flags,
NvBool bForceDiscovery);


/************************************************************************************************/
Expand Down
5 changes: 3 additions & 2 deletions src/common/nvlink/kernel/nvlink/core/nvlink_discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ nvlink_core_discover_and_get_remote_end
(
nvlink_link *end,
nvlink_link **remote_end,
NvU32 flags
NvU32 flags,
NvBool bForceDiscovery
)
{
nvlink_intranode_conn *conn = NULL;
Expand All @@ -67,7 +68,7 @@ nvlink_core_discover_and_get_remote_end
return;
}

if (nvlinkLibCtx.bNewEndpoints)
if (nvlinkLibCtx.bNewEndpoints || bForceDiscovery)
{
if (!_nvlink_core_all_links_initialized())
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,7 @@ _nvlink_lib_ctrl_device_discover_peer_link
(linkMode == NVLINK_LINKSTATE_SLEEP))
{
nvlink_link *remoteLink = NULL;
nvlink_core_discover_and_get_remote_end(link, &remoteLink, 0);
nvlink_core_discover_and_get_remote_end(link, &remoteLink, 0, NV_FALSE);
if (remoteLink == NULL)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ nvlink_lib_discover_and_get_remote_conn_info
(
nvlink_link *end,
nvlink_conn_info *conn_info,
NvU32 flags
NvU32 flags,
NvBool bForceDiscovery
)
{
NvlStatus status = NVL_SUCCESS;
Expand Down Expand Up @@ -257,7 +258,8 @@ nvlink_lib_discover_and_get_remote_conn_info
conn_info->bConnected = NV_FALSE;

// Get the remote_end of the link
nvlink_core_discover_and_get_remote_end(end, &remote_end, flags);
nvlink_core_discover_and_get_remote_end(end, &remote_end, flags,
bForceDiscovery);

if (remote_end)
{
Expand Down
3 changes: 2 additions & 1 deletion src/common/nvlink/kernel/nvlink/nvlink_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ void nvlink_core_correlate_conn_by_token(nvlink_link *srcLink, NvU64 writeToken,
*/
void nvlink_core_discover_and_get_remote_end(nvlink_link *end,
nvlink_link **remote_end,
NvU32 flags);
NvU32 flags,
NvBool bForceDiscovery);


/************************************************************************************************/
Expand Down
4 changes: 3 additions & 1 deletion src/common/nvswitch/kernel/lr10/lr10.c
Original file line number Diff line number Diff line change
Expand Up @@ -4071,7 +4071,9 @@ nvswitch_ctrl_get_nvlink_status_lr10
}
else
{
nvlink_lib_discover_and_get_remote_conn_info(link, &conn_info, NVLINK_STATE_CHANGE_SYNC);
nvlink_lib_discover_and_get_remote_conn_info(link, &conn_info,
NVLINK_STATE_CHANGE_SYNC,
NV_FALSE);
}

// Set NVLINK per-link caps
Expand Down
89 changes: 86 additions & 3 deletions src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080nvlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -2971,8 +2971,91 @@ typedef struct NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS {
NvU32 linkId;
} NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS;

#define NV2080_CTRL_CMD_NVLINK_POST_FAULT_UP (0x20803043U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS_MESSAGE_ID" */
#define NV2080_CTRL_CMD_NVLINK_POST_FAULT_UP (0x20803043U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS_MESSAGE_ID" */

#define NV2080_CTRL_NVLINK_PORT_EVENT_COUNT_SIZE 64U

/*
* Structure to store port event information
*
* portEventType
* Type of port even that occurred: NVLINK_PORT_EVENT_TYPE*
*
* gpuId
* Gpu that port event occurred on
*
* linkId
* Link id that port event occurred on
*
* time
* Platform time (nsec) when event occurred
*/
typedef struct NV2080_CTRL_NVLINK_PORT_EVENT {
NvU32 portEventType;
NvU32 gpuId;
NvU32 linkId;
NV_DECLARE_ALIGNED(NvU64 time, 8);
} NV2080_CTRL_NVLINK_PORT_EVENT;

/*
* NV2080_CTRL_CMD_NVLINK_GET_PORT_EVENTS
*
* This command returns the port up and port down events that have occurred
*
* Parameters:
*
* portEventIndex [IN/OUT]
* On input: The index of the first port event at which to start reading out of the driver.
*
* On output: The index of the first port event that wasn't reported through the 'port event' array
* in this call to NV2080_CTRL_CMD_NVLINK_GET_PORT_EVENTS.
*
* nextPortEventIndex[OUT]
* The index that will be assigned to the next port event that occurs.
* Users of the GET_PORT_EVENTS control call may set 'portEventIndex' to this field on initialization
* to bypass port events that have already occurred without making multiple control calls.
*
* portEventCount [OUT]
* Number of port events returned by the call. Currently, portEventCount is limited
* by NV2080_CTRL_NVLINK_PORT_EVENT_COUNT_SIZE. In order to query all the port events, a
* client needs to keep calling the control till portEventCount is zero.
*
* bOverflow [OUT]
* True when the port event log is overflowed and no longer contains all the port
* events that have occurred, false otherwise.
*
* portEvent [OUT]
* The port event entires.
*/
#define NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS_MESSAGE_ID (0x44U)

typedef struct NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS {
NV_DECLARE_ALIGNED(NvU64 portEventIndex, 8);
NV_DECLARE_ALIGNED(NvU64 nextPortEventIndex, 8);
NvU32 portEventCount;
NvBool bOverflow;
NV_DECLARE_ALIGNED(NV2080_CTRL_NVLINK_PORT_EVENT portEvent[NV2080_CTRL_NVLINK_PORT_EVENT_COUNT_SIZE], 8);
} NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS;

#define NV2080_CTRL_CMD_NVLINK_GET_PORT_EVENTS (0x20803044U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_GET_PORT_EVENTS_PARAMS_MESSAGE_ID" */

/*
* NV2080_CTRL_CMD_NVLINK_CYCLE_LINK
*
* This command cycles a link by faulting it and then retraining the link
*
* Parameters:
*
* linkId [IN]
* The link id of the link to be cycled
*/
#define NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS_MESSAGE_ID (0x45U)

typedef struct NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS {
NvU32 linkId;
} NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS;

#define NV2080_CTRL_CMD_NVLINK_CYCLE_LINK (0x20803045U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_CYCLE_LINK_PARAMS_MESSAGE_ID" */

/*
* NV2080_CTRL_CMD_NVLINK_IS_REDUCED_CONFIG
Expand All @@ -2982,13 +3065,13 @@ typedef struct NV2080_CTRL_NVLINK_POST_FAULT_UP_PARAMS {
* [out] bReducedNvlinkConfig
* Link number which the sequence should be triggered
*/
#define NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID (0x44U)
#define NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID (0x46U)

typedef struct NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS {
NvBool bReducedNvlinkConfig;
} NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS;

#define NV2080_CTRL_CMD_NVLINK_IS_REDUCED_CONFIG (0x20803044U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID" */
#define NV2080_CTRL_CMD_NVLINK_IS_REDUCED_CONFIG (0x20803046U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_NVLINK_INTERFACE_ID << 8) | NV2080_CTRL_NVLINK_IS_REDUCED_CONFIG_PARAMS_MESSAGE_ID" */


/* _ctrl2080nvlink_h_ */
Expand Down
1 change: 1 addition & 0 deletions src/nvidia/generated/g_kernel_nvlink_nvoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ struct KernelNvlink {
NvBool PRIVATE_FIELD(bEnableSafeModeAtLoad);
NvBool PRIVATE_FIELD(bEnableAli);
NvBool PRIVATE_FIELD(bFloorSwept);
NvU32 PRIVATE_FIELD(numPortEvents);
NvBool PRIVATE_FIELD(bLinkTrainingDebugSpew);
NvBool PRIVATE_FIELD(bDisableL2Mode);
NvU32 PRIVATE_FIELD(nvlinkLinkSpeed);
Expand Down
Loading

0 comments on commit c588c38

Please sign in to comment.