LuxDL · avik-pal · Jan 24, 2025 · Jan 5, 2025 · Jan 11, 2025 · Jan 23, 2025
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -69,19 +69,19 @@ steps:
                 agents:
                   queue: "juliagpu"
 
-            # Benchmarks
-            - path:
-                - "src/"
-                - "ext/"
-                - "test/"
-                - "Project.toml"
-                - ".buildkite/"
-                - "benchmarks/"
-              if: build.pull_request.labels includes "run benchmarks"
-              config:
-                command: "buildkite-agent pipeline upload .buildkite/benchmarks.yml"
-                agents:
-                  queue: "juliagpu"
+            # # Benchmarks
+            # - path:
+            #     - "src/"
+            #     - "ext/"
+            #     - "test/"
+            #     - "Project.toml"
+            #     - ".buildkite/"
+            #     - "benchmarks/"
+            #   if: build.pull_request.labels includes "run benchmarks"
+            #   config:
+            #     command: "buildkite-agent pipeline upload .buildkite/benchmarks.yml"
+            #     agents:
+            #       queue: "juliagpu"
 
             # Documentation
             - path:

diff --git a/.github/workflows/Benchmark.yml b/.github/workflows/Benchmark.yml
@@ -22,37 +22,37 @@ on:
       - main
 
 jobs:
-  benchmark:
-    if: ${{ !contains(github.event.head_commit.message, '[skip benchmarks]') }}
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: Download Buildkite Artifacts
-        id: download
-        uses: EnricoMi/download-buildkite-artifact-action@v1
-        with:
-          buildkite_token: ${{ secrets.BUILDKITE_TOKEN }}
-          output_path: artifacts
+  # benchmark:
+  #   if: ${{ !contains(github.event.head_commit.message, '[skip benchmarks]') }}
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #     - name: Download Buildkite Artifacts
+  #       id: download
+  #       uses: EnricoMi/download-buildkite-artifact-action@v1
+  #       with:
+  #         buildkite_token: ${{ secrets.BUILDKITE_TOKEN }}
+  #         output_path: artifacts
 
-      - name: Locate Benchmarks Artifact
-        id: locate
-        if: ${{ steps.download.outputs.download-state == 'success' }}
-        run: echo "path=$(find artifacts -type f -name combinedbenchmarks.json 2>/dev/null)" >> $GITHUB_OUTPUT
+  #     - name: Locate Benchmarks Artifact
+  #       id: locate
+  #       if: ${{ steps.download.outputs.download-state == 'success' }}
+  #       run: echo "path=$(find artifacts -type f -name combinedbenchmarks.json 2>/dev/null)" >> $GITHUB_OUTPUT
 
-      - name: Upload Benchmark Results
-        if: ${{ steps.locate.outputs.path != '' }}
-        uses: benchmark-action/github-action-benchmark@v1
-        with:
-          name: Lux Benchmarks
-          tool: "julia"
-          output-file-path: ${{ steps.locate.outputs.path }}
-          benchmark-data-dir-path: "benchmarks"
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          comment-always: true
-          summary-always: true
-          alert-threshold: "150%"
-          fail-on-alert: false
-          auto-push: ${{ github.event_name != 'pull_request' }}
+  #     - name: Upload Benchmark Results
+  #       if: ${{ steps.locate.outputs.path != '' }}
+  #       uses: benchmark-action/github-action-benchmark@v1
+  #       with:
+  #         name: Lux Benchmarks
+  #         tool: "julia"
+  #         output-file-path: ${{ steps.locate.outputs.path }}
+  #         benchmark-data-dir-path: "benchmarks"
+  #         github-token: ${{ secrets.GITHUB_TOKEN }}
+  #         comment-always: true
+  #         summary-always: true
+  #         alert-threshold: "150%"
+  #         fail-on-alert: false
+  #         auto-push: ${{ github.event_name != 'pull_request' }}
 
   airspeed-velocity:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -43,13 +43,6 @@ jobs:
           - "autodiff"
           - "misc"
           - "reactant"
-        include:
-          - version: "1"
-            os: "macos-latest"
-            test_group: "all"
-          - version: "1"
-            os: "windows-latest"
-            test_group: "all"
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -89,6 +82,16 @@ jobs:
   downgrade:
     if: ${{ !contains(github.event.head_commit.message, '[skip tests]') && github.base_ref == github.event.repository.default_branch }}
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        test_group:
+          - "core_layers"
+          - "normalize_layers"
+          - "recurrent_layers"
+          - "autodiff"
+          - "misc"
+          - "reactant"
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -115,6 +118,8 @@ jobs:
           dir = dirname(pathof(Lux))
           include(joinpath(dir, "../test/runtests.jl"))
         shell: julia --color=yes --code-coverage=user --depwarn=yes --project=test {0}
+        env:
+          LUX_TEST_GROUP: ${{ matrix.test_group }}
       - uses: julia-actions/julia-processcoverage@v1
         with:
           directories: src,ext,lib/LuxCore/src,lib/LuxCore/ext,lib/MLDataDevices/src,lib/MLDataDevices/ext,lib/WeightInitializers/src,lib/WeightInitializers/ext,lib/LuxLib/src,lib/LuxLib/ext,lib/LuxTestUtils/src

diff --git a/.github/workflows/CI_LuxLib.yml b/.github/workflows/CI_LuxLib.yml
@@ -50,16 +50,6 @@ jobs:
             test_group: "misc"
             blas_backend: "default"
             loopvec: "false"
-          - version: "1"
-            os: macos-latest
-            test_group: "all"
-            blas_backend: "default"
-            loopvec: "true"
-          - version: "1"
-            os: windows-latest
-            test_group: "all"
-            blas_backend: "default"
-            loopvec: "true"
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -103,6 +93,12 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
+      matrix:
+        test_group:
+          - "conv"
+          - "dense"
+          - "normalization"
+          - "misc"
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -130,7 +126,7 @@ jobs:
           include(joinpath(dir, "../test/runtests.jl"))
         shell: julia --color=yes --code-coverage=user --depwarn=yes --project=lib/LuxLib/test {0}
         env:
-          LUXLIB_TEST_GROUP: "all"
+          LUXLIB_TEST_GROUP: ${{ matrix.test_group }}
           LUXLIB_BLAS_BACKEND: "default"
           LUXLIB_LOAD_LOOPVEC: "true"
       - uses: julia-actions/julia-processcoverage@v1

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Lux"
 uuid = "b2108857-7c20-44ae-9111-449ecde12c47"
 authors = ["Avik Pal <[email protected]> and contributors"]
-version = "1.5.2"
+version = "1.6.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -85,7 +85,7 @@ ArrayInterface = "7.17.1"
 CUDA = "5.3.2"
 ChainRulesCore = "1.25"
 Compat = "4.16"
-ComponentArrays = "0.15.18"
+ComponentArrays = "0.15.22"
 ConcreteStructs = "0.2.3"
 DispatchDoctor = "0.4.12"
 Enzyme = "0.13.28"
@@ -110,7 +110,7 @@ NNlib = "0.9.26"
 Optimisers = "0.4.1"
 Preferences = "1.4.3"
 Random = "1.10"
-Reactant = "0.2.13"
+Reactant = "0.2.21"
 Reexport = "1.2.2"
 ReverseDiff = "1.15"
 SIMDTypes = "0.1"
@@ -121,5 +121,5 @@ StaticArraysCore = "1.4.3"
 Statistics = "1.10"
 Tracker = "0.2.37"
 WeightInitializers = "1"
-Zygote = "0.6.70"
+Zygote = "0.6.70, 0.7"
 julia = "1.10"
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -34,7 +34,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 ADTypes = "1.10"
 Adapt = "4"
 ChainRulesCore = "1.25"
-ComponentArrays = "0.15.18"
+ComponentArrays = "0.15.22"
 Documenter = "1.4"
 DocumenterVitepress = "0.1.3"
 Enzyme = "0.13.16"
@@ -56,11 +56,11 @@ Optimisers = "0.4.1"
 Pkg = "1.10"
 Printf = "1.10"
 Random = "1.10"
-Reactant = "0.2.11"
+Reactant = "0.2.21"
 StableRNGs = "1"
 StaticArrays = "1"
 WeightInitializers = "1"
-Zygote = "0.6.70"
+Zygote = "0.6.70, 0.7"
 julia = "1.10"
 
 [sources]

diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts
@@ -365,10 +365,6 @@ export default defineConfig({
                 text: "Distributed Training",
                 link: "/manual/distributed_utils",
               },
-              {
-                text: "Lux In GPU Kernels",
-                link: "/manual/nn_inside_gpu_kernels",
-              },
             ],
           },
         ],

diff --git a/docs/src/manual/compiling_lux_models.md b/docs/src/manual/compiling_lux_models.md
@@ -10,12 +10,6 @@ Quoting the Reactant.jl Readme:
 > removed. The benefits of this approach is immediately making all such code available for
 > advanced optimization with little developer effort.
 
-!!! danger "Experimental"
-
-    Reactant compilation is a very new feature and is currently experimental. Certain models
-    might not be compilable yet, but we are actively working on it. Open an issue if you
-    encounter any problems.
-
 ```@example compile_lux_model
 using Lux, Reactant, Enzyme, Random, Zygote
 using Functors, Optimisers, Printf

diff --git a/docs/src/manual/debugging.md b/docs/src/manual/debugging.md
@@ -143,8 +143,12 @@ Let us define a custom backward pass to introduce some NaNs:
 function CRC.rrule(::typeof(offending_layer), x)
     y = offending_layer(x)
     function ∇offending_layer(Δ)
-        Δ[1] = NaN
-        return NoTangent(), Δ
+        problematicΔ = CRC.@thunk begin
+            Δ = CRC.unthunk(Δ)
+            Δ[1] = NaN
+            return Δ
+        end
+        return NoTangent(), problematicΔ
     end
     return y, ∇offending_layer
 end

diff --git a/docs/src/manual/exporting_to_jax.md b/docs/src/manual/exporting_to_jax.md
@@ -1,11 +1,5 @@
 # Exporting Lux Models to Jax (via EnzymeJAX & Reactant)
 
-!!! danger "Experimental"
-
-    This feature is experimental and is subject to change without notice. Additionally,
-    this feature currently requires some manual setup for interacting with Jax, which we are
-    working on improving.
-
 In this manual, we will go over how to export Lux models to StableHLO and use
 [EnzymeJAX](https://github.com/EnzymeAD/Enzyme-JAX) to run integrate Lux models with
 JAX. We assume that users are familiar with

diff --git a/docs/src/manual/nested_autodiff.md b/docs/src/manual/nested_autodiff.md
@@ -80,7 +80,7 @@ ps, st = Lux.setup(StableRNG(0), model)
 x = randn(StableRNG(0), Float32, 2, 10)
 y = randn(StableRNG(11), Float32, 2, 10)
 
-loss_function1(model, x, ps, st, y)
+loss_function1(model, x, ps, Lux.testmode(st), y)
 ```
 
 So our loss function works, let's take the gradient (forward diff doesn't nest nicely here):
@@ -106,12 +106,12 @@ nothing; # hide
 That's pretty good, of course you will have some error from the finite differences
 calculation.
 
-### Using Batched Jacobian for Multiple Inputs 
+### Using Batched Jacobian for Multiple Inputs
 
 Notice that in this example the Jacobian `J` consists on the full matrix of derivatives of `smodel` with respect
-the different inputs in `x`. In many cases, we are interested in computing the Jacobian with respect to each 
-input individually, avoiding the unnecessary calculation of zero entries of the Jacobian. This can be achieved with 
-[`batched_jacobian`](@ref) to parse the calculation of the Jacobian per each single input. Using the same example 
+the different inputs in `x`. In many cases, we are interested in computing the Jacobian with respect to each
+input individually, avoiding the unnecessary calculation of zero entries of the Jacobian. This can be achieved with
+[`batched_jacobian`](@ref) to parse the calculation of the Jacobian per each single input. Using the same example
 from the previous section:
 
 ```@example nested_ad
@@ -134,9 +134,9 @@ end
 loss_function_batched(model, x, ps, st, y)
 ```
 
-Notice that in this last example we removed `BatchNorm()` from the neural network. This is done so outputs corresponding 
-to differern inputs don't have an algebraic dependency due to the batch normalization happening in the neural network. 
-We can now verify again the value of the Jacobian: 
+Notice that in this last example we removed `BatchNorm()` from the neural network. This is done so outputs corresponding
+to different inputs don't have an algebraic dependency due to the batch normalization happening in the neural network.
+We can now verify again the value of the Jacobian:
 
 ```@example nested_ad
 ∂x_fd = FiniteDiff.finite_difference_gradient(x -> loss_function_batched(model, x, ps, st, y), x)
@@ -150,8 +150,8 @@ println("∞-norm(∂ps_b - ∂ps_fd): ", norm(ComponentArray(∂ps_b) .- ∂ps_
 @assert norm(ComponentArray(∂ps_b) .- ∂ps_fd, Inf) < 1e-2 # hide
 ```
 
-In this example, it is important to remark that now `batched_jacobian` returns a 3D array with the Jacobian calculation 
-for each independent input value in `x`. 
+In this example, it is important to remark that now `batched_jacobian` returns a 3D array with the Jacobian calculation
+for each independent input value in `x`.
 
 ## Loss Function contains Gradient Computation