diff --git a/docs/get-started/xgboost-examples/csp/databricks/databricks.md b/docs/get-started/xgboost-examples/csp/databricks/databricks.md index d56f8ca8..8bb7fe7f 100644 --- a/docs/get-started/xgboost-examples/csp/databricks/databricks.md +++ b/docs/get-started/xgboost-examples/csp/databricks/databricks.md @@ -21,7 +21,7 @@ Navigate to your home directory in the UI and select **Create** > **File** from create an `init.sh` scripts with contents: ```bash #!/bin/bash - sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.08.1.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar + sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.10.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar ``` 1. Select the Databricks Runtime Version from one of the supported runtimes specified in the Prerequisites section. @@ -68,7 +68,7 @@ create an `init.sh` scripts with contents: ```bash spark.rapids.sql.python.gpu.enabled true spark.python.daemon.module rapids.daemon_databricks - spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.08.1.jar:/databricks/spark/python + spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.10.0.jar:/databricks/spark/python ``` Note that since python memory pool require installing the cudf library, so you need to install cudf library in each worker nodes `pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com` or disable python memory pool diff --git a/docs/get-started/xgboost-examples/csp/databricks/init.sh b/docs/get-started/xgboost-examples/csp/databricks/init.sh index 750a0733..fc415b2d 100644 --- a/docs/get-started/xgboost-examples/csp/databricks/init.sh +++ b/docs/get-started/xgboost-examples/csp/databricks/init.sh @@ -1,7 +1,7 @@ sudo rm -f /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-gpu_2.12--ml.dmlc__xgboost4j-gpu_2.12__1.5.2.jar sudo rm -f /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.5.2.jar -sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.08.1.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar +sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.10.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar sudo wget -O /databricks/jars/xgboost4j-gpu_2.12-1.7.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-gpu_2.12/1.7.1/xgboost4j-gpu_2.12-1.7.1.jar sudo wget -O /databricks/jars/xgboost4j-spark-gpu_2.12-1.7.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-spark-gpu_2.12/1.7.1/xgboost4j-spark-gpu_2.12-1.7.1.jar ls -ltr diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md index 2d685ef5..a0b5fe30 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md @@ -6,12 +6,12 @@ the reader will be able to run a sample Apache Spark XGBoost application on NVID Prerequisites ------------- -* Apache Spark 3.1.1+ (e.g.: Spark 3.1.1) +* Apache Spark 3.2.0+ (e.g.: Spark 3.2.0) * Hardware Requirements * NVIDIA Pascal™ GPU architecture or better * Multi-node clusters with homogenous GPU configuration * Software Requirements - * Ubuntu 18.04, 20.04/CentOS7, Rocky Linux 8 + * Ubuntu 20.04, 22.04/CentOS7, Rocky Linux 8 * CUDA 11.0+ * NVIDIA driver compatible with your CUDA * NCCL 2.7.8+ @@ -40,7 +40,7 @@ export SPARK_DOCKER_IMAGE= export SPARK_DOCKER_TAG= pushd ${SPARK_HOME} -wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-24.08/dockerfile/Dockerfile +wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-24.10/dockerfile/Dockerfile # Optionally install additional jars into ${SPARK_HOME}/jars/ diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md index 70de42c1..b271f03d 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md @@ -1,17 +1,17 @@ Get Started with XGBoost4J-Spark on an Apache Spark Standalone Cluster ====================================================================== -This is a getting started guide to XGBoost4J-Spark on an Apache Spark 3.1+ Standalone Cluster. +This is a getting started guide to XGBoost4J-Spark on an Apache Spark 3.2+ Standalone Cluster. At the end of this guide, the user can run a sample Apache Spark Python application that runs on NVIDIA GPUs. Prerequisites ------------- -* Apache Spark 3.1.1+ Standalone Cluster (e.g.: Spark 3.1.1) +* Apache Spark 3.2.0+ (e.g.: Spark 3.2.0) * Hardware Requirements * NVIDIA Pascal™ GPU architecture or better * Multi-node clusters with homogenous GPU configuration * Software Requirements - * Ubuntu 18.04, 20.04/CentOS7, Rocky Linux 8 + * Ubuntu 20.04, 22.04/CentOS7, Rocky Linux 8 * CUDA 11.5+ * NVIDIA driver compatible with your CUDA * NCCL 2.7.8+ @@ -31,7 +31,7 @@ In this example, we will limit the number of cores to 1, to match our dataset. Please see https://spark.apache.org/docs/latest/spark-standalone.html for more documentation regarding Standalone configuration. We use `SPARK_HOME` environment variable to point to the Apache Spark cluster. -And here are the steps to enable the GPU resources discovery for Spark 3.1+. +And here are the steps to enable the GPU resources discovery for Spark 3.2+. 1. Copy the spark config file from template diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md index c94df408..9910d45d 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md @@ -1,18 +1,18 @@ Get Started with XGBoost4J-Spark on an Apache Spark Standalone Cluster ====================================================================== -This is a getting-started guide to XGBoost on an Apache Spark 3.1+ Standalone Cluster. At the end of this guide, +This is a getting-started guide to XGBoost on an Apache Spark 3.2+ Standalone Cluster. At the end of this guide, the user can run a sample Apache Spark application that runs on NVIDIA GPUs. Prerequisites ------------- -* Apache Spark 3.1.1+ Standalone Cluster (e.g.: Spark 3.1.1) +* Apache Spark 3.2.0+ Standalone Cluster (e.g.: Spark 3.2.0) * Hardware Requirements * NVIDIA Pascal™ GPU architecture or better * Multi-node clusters with homogenous GPU configuration * Software Requirements - * Ubuntu 18.04, 20.04/CentOS7, Rocky Linux 8 + * Ubuntu 20.04, 22.04/CentOS7, Rocky Linux 8 * CUDA 11.0+ * NVIDIA driver compatible with your CUDA * NCCL 2.7.8+ @@ -28,7 +28,7 @@ In this example, we will limit the number of cores to 1, to match our dataset. Please see https://spark.apache.org/docs/latest/spark-standalone.html for more documentation regarding Standalone configuration. We use `SPARK_HOME` environment variable to point to the Apache Spark cluster. -And here are steps to enable the GPU resources discovery for Spark 3.1+. +And here are steps to enable the GPU resources discovery for Spark 3.2+. 1. Copy the spark configure file from template. diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md index b5565d02..89105d49 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-python.md @@ -6,12 +6,12 @@ At the end of this guide, the reader will be able to run a sample Apache Spark P Prerequisites ------------- -* Apache Spark 3.1.1+ running on YARN supporting GPU scheduling. (e.g.: Spark 3.1.1, Hadoop-Yarn 3.3.0) +* Apache Spark 3.2.0+ running on YARN supporting GPU scheduling. (e.g.: Spark 3.2.0, Hadoop-Yarn 3.3.0) * Hardware Requirements * NVIDIA Pascal™ GPU architecture or better * Multi-node clusters with homogenous GPU configuration * Software Requirements - * Ubuntu 18.04, 20.04/CentOS7, Rocky Linux 8 + * Ubuntu 20.04, 22.04/CentOS7, Rocky Linux 8 * CUDA 11.5+ * NVIDIA driver compatible with your CUDA * NCCL 2.7.8+ diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md index 97c60d56..f0387aaa 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/yarn-scala.md @@ -7,12 +7,12 @@ At the end of this guide, the reader will be able to run a sample Apache Spark a Prerequisites ------------- -* Apache Spark 3.1.1+ running on YARN supporting GPU scheduling. (e.g.: Spark 3.1.1, Hadoop-Yarn 3.3.0) +* Apache Spark 3.2.0+ running on YARN supporting GPU scheduling. (e.g.: Spark 3.2.0, Hadoop-Yarn 3.3.0) * Hardware Requirements * NVIDIA Pascal™ GPU architecture or better * Multi-node clusters with homogenous GPU configuration * Software Requirements - * Ubuntu 18.04, 20.04/CentOS7, Rocky Linux 8 + * Ubuntu 20.04, 22.04/CentOS7, Rocky Linux 8 * CUDA 11.0+ * NVIDIA driver compatible with your CUDA * NCCL 2.7.8+ diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md index 61df8def..d8534cb1 100644 --- a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md +++ b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md @@ -5,7 +5,7 @@ For simplicity export the location to these jars. All examples assume the packag ### Download the jars Download the RAPIDS Accelerator for Apache Spark plugin jar - * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar) + * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar) ### Build XGBoost Python Examples diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md index ba4ab38d..8cbfa0db 100644 --- a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md +++ b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md @@ -5,7 +5,7 @@ For simplicity export the location to these jars. All examples assume the packag ### Download the jars 1. Download the RAPIDS Accelerator for Apache Spark plugin jar - * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar) + * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar) ### Build XGBoost Scala Examples diff --git a/examples/ML+DL-Examples/Spark-Rapids-ML/pca/README.md b/examples/ML+DL-Examples/Spark-Rapids-ML/pca/README.md index 4b5fd567..9de395c0 100644 --- a/examples/ML+DL-Examples/Spark-Rapids-ML/pca/README.md +++ b/examples/ML+DL-Examples/Spark-Rapids-ML/pca/README.md @@ -10,7 +10,7 @@ Please refer to the Spark-Rapids-ML [README](https://github.com/NVIDIA/spark-rap ## Download RAPIDS Jar from Maven Central Download the [Spark-Rapids plugin](https://nvidia.github.io/spark-rapids/docs/download.html#download-rapids-accelerator-for-apache-spark-v24081). -For Spark-RAPIDS-ML version 24.08, download the RAPIDS jar from Maven Central: [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar). +For Spark-RAPIDS-ML version 24.10, download the RAPIDS jar from Maven Central: [rapids-4-spark_2.12-24.10.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar). ## Running the Notebooks diff --git a/examples/ML+DL-Examples/Spark-Rapids-ML/pca/notebooks/pca.ipynb b/examples/ML+DL-Examples/Spark-Rapids-ML/pca/notebooks/pca.ipynb index 37a11238..9af1f9c3 100644 --- a/examples/ML+DL-Examples/Spark-Rapids-ML/pca/notebooks/pca.ipynb +++ b/examples/ML+DL-Examples/Spark-Rapids-ML/pca/notebooks/pca.ipynb @@ -43,7 +43,7 @@ "24/10/04 18:04:27 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", - "24/10/04 18:04:27 WARN RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.0, private revision 9fac64da220ddd6bf5626bd7bd1dd74c08603eac\n", + "24/10/04 18:04:27 WARN RapidsPluginUtils: RAPIDS Accelerator 24.10.0 using cudf 24.10.0, private revision 9fac64da220ddd6bf5626bd7bd1dd74c08603eac\n", "24/10/04 18:04:27 WARN RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n", "24/10/04 18:04:31 WARN GpuDeviceManager: RMM pool is disabled since spark.rapids.memory.gpu.pooling.enabled is set to false; however, this configuration is deprecated and the behavior may change in a future release.\n" ] @@ -57,7 +57,7 @@ " import os\n", " import requests\n", "\n", - " SPARK_RAPIDS_VERSION = \"24.08.1\"\n", + " SPARK_RAPIDS_VERSION = \"24.10.0\"\n", " rapids_jar = f\"rapids-4-spark_2.12-{SPARK_RAPIDS_VERSION}.jar\"\n", " if not os.path.exists(rapids_jar):\n", " print(\"Downloading spark rapids jar\")\n", @@ -539,7 +539,7 @@ ], "metadata": { "kernelspec": { - "display_name": "rapids-24.08", + "display_name": "rapids-24.10", "language": "python", "name": "python3" }, diff --git a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb index e2c2b4a6..a7f9780d 100644 --- a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb +++ b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb @@ -22,7 +22,7 @@ "import os\n", "# Change to your cluster ip:port and directories\n", "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark:your-ip:port\")\n", - "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-24.08.1.jar\")\n" + "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-24.10.0.jar\")\n" ] }, { diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md index 2a9fcf6c..67d9a618 100644 --- a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md +++ b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md @@ -186,7 +186,7 @@ then do the following inside the Docker container. ### Get jars from Maven Central -[rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar) +[rapids-4-spark_2.12-24.10.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar) ### Launch a local mode Spark diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml index 0a238f7d..80f9c6de 100644 --- a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml +++ b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml @@ -37,7 +37,7 @@ cuda11 2.12 - 24.06.0 + 24.10.0 3.1.1 2.12.15 ${project.build.directory}/cpp-build diff --git a/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb b/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb index 7be0e60f..d997ca12 100644 --- a/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb +++ b/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb @@ -73,7 +73,7 @@ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "2022-11-30 06:57:40,550 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n", - "2022-11-30 06:57:54,195 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n", + "2022-11-30 06:57:54,195 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.10.0 using cudf 24.10.0.\n", "2022-11-30 06:57:54,210 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n", "2022-11-30 06:57:54,214 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n", "2022-11-30 06:57:54,214 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n", diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb index 02875a23..edb88183 100644 --- a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb +++ b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb @@ -6,10 +6,10 @@ "source": [ "## Prerequirement\n", "### 1. Download data\n", - "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-24.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n", + "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-24.10/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n", "\n", "### 2. Download needed jars\n", - "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n", + "* [rapids-4-spark_2.12-24.10.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar)\n", "\n", "\n", "### 3. Start Spark Standalone\n", @@ -17,7 +17,7 @@ "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n", + "$ export SPARK_JARS=rapids-4-spark_2.12-24.10.0.jar\n", "$ export PYSPARK_DRIVER_PYTHON=jupyter \n", "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n", "```\n", diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb index fd152646..663efeef 100644 --- a/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb +++ b/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb @@ -63,7 +63,7 @@ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "2022-11-25 09:34:43,952 WARN resource.ResourceUtils: The configuration of cores (exec = 4 task = 1, runnable tasks = 4) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n", - "2022-11-25 09:34:58,155 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n", + "2022-11-25 09:34:58,155 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.10.0 using cudf 24.10.0.\n", "2022-11-25 09:34:58,171 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n", "2022-11-25 09:34:58,175 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n", "2022-11-25 09:34:58,175 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n" diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb index f3f8ab04..e0b2ba33 100644 --- a/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb +++ b/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb @@ -84,7 +84,7 @@ "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering BlockManagerMaster\n", "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering BlockManagerMasterHeartbeat\n", "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering OutputCommitCoordinator\n", - "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n", + "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator 24.10.0 using cudf 24.10.0.\n", "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n", "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n", "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n" diff --git a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb index a2725240..a55d0787 100644 --- a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb +++ b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb @@ -20,14 +20,14 @@ "Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-23.12/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n", "\n", "### 2. Download needed jars\n", - "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n", + "* [rapids-4-spark_2.12-24.10.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar)\n", "\n", "### 3. Start Spark Standalone\n", "Before Running the script, please setup Spark standalone mode\n", "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n", + "$ export SPARK_JARS=rapids-4-spark_2.12-24.10.0.jar\n", "\n", "```\n", "\n", diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb index 35dc78f2..5a45fe46 100644 --- a/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb +++ b/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb @@ -62,7 +62,7 @@ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "2022-11-30 08:02:10,103 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n", - "2022-11-30 08:02:23,737 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n", + "2022-11-30 08:02:23,737 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.10.0 using cudf 24.10.0.\n", "2022-11-30 08:02:23,752 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n", "2022-11-30 08:02:23,756 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n", "2022-11-30 08:02:23,757 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n", diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb index b00765d4..f10937fe 100644 --- a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb +++ b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb @@ -19,14 +19,14 @@ "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n", "\n", "### 2. Download needed jars\n", - "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n", + "* [rapids-4-spark_2.12-24.10.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar)\n", "\n", "### 3. Start Spark Standalone\n", "Before running the script, please setup Spark standalone mode\n", "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n", + "$ export SPARK_JARS=rapids-4-spark_2.12-24.10.0.jar\n", "$ export PYSPARK_DRIVER_PYTHON=jupyter \n", "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n", "```\n", diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb index 078d3ad5..1a1eb829 100644 --- a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb +++ b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb @@ -73,7 +73,7 @@ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "2022-11-30 07:51:19,480 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n", - "2022-11-30 07:51:33,277 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n", + "2022-11-30 07:51:33,277 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.10.0 using cudf 24.10.0.\n", "2022-11-30 07:51:33,292 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n", "2022-11-30 07:51:33,295 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n", "2022-11-30 07:51:33,295 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n", @@ -266,7 +266,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Training takes 24.08 seconds\n" + "Training takes 24.10 seconds\n" ] }, { diff --git a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb index 979d2f9a..02f38b99 100644 --- a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb +++ b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb @@ -19,14 +19,14 @@ "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n", "\n", "### 2. Download needed jar\n", - "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n", + "* [rapids-4-spark_2.12-24.10.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.10.0/rapids-4-spark_2.12-24.10.0.jar)\n", "\n", "### 3. Start Spark Standalone\n", "Before running the script, please setup Spark standalone mode\n", "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n", + "$ export SPARK_JARS=rapids-4-spark_2.12-24.10.0.jar\n", "\n", "```\n", "\n", diff --git a/tools/databricks/README.md b/tools/databricks/README.md index 7691e250..540c41f6 100644 --- a/tools/databricks/README.md +++ b/tools/databricks/README.md @@ -20,4 +20,4 @@ top of the notebook. After that, select *Run all* to execute the tools for the 1. Multiple event logs must be comma-separated. - For example: `/dbfs/path/to/eventlog1,/dbfs/path/to/eventlog2` -**Latest Tools Version Supported** 24.08.0 \ No newline at end of file +**Latest Tools Version Supported** 24.10.0 \ No newline at end of file diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb index 503b18ff..f59953a1 100644 --- a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb +++ b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb @@ -53,7 +53,7 @@ }, "outputs": [], "source": [ - "TOOLS_VER = \"24.08.0\"\n", + "TOOLS_VER = \"24.10.0\"\n", "print(f\"Using Tools Version: {TOOLS_VER}\")" ] }, diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb index 898a4846..b27f2f91 100644 --- a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb +++ b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb @@ -49,7 +49,7 @@ }, "outputs": [], "source": [ - "TOOLS_VER = \"24.08.0\"\n", + "TOOLS_VER = \"24.10.0\"\n", "print(f\"Using Tools Version: {TOOLS_VER}\")" ] },