cvxgrp · healeyq3 · Jul 25, 2024 · Aug 10, 2024 · Aug 15, 2024
diff --git a/cvxpylayers/torch/cvxpylayer.py b/cvxpylayers/torch/cvxpylayer.py
@@ -283,11 +283,18 @@ def forward(ctx, *params):
                 ctx.shapes.append(A.shape)
             info['canon_time'] = time.time() - start
 
-            # compute solution and derivative function
+            # compute solution (always)
+            # and derivative function (if needed for reverse mode)
             start = time.time()
             try:
-                xs, _, _, _, ctx.DT_batch = diffcp.solve_and_derivative_batch(
-                    As, bs, cs, cone_dicts, **solver_args)
+                if any(p.requires_grad for p in params):
+                    xs, _, _, _, ctx.DT_batch = (
+                        diffcp.solve_and_derivative_batch(
+                            As, bs, cs, cone_dicts, **solver_args)
+                    )
+                else:
+                    xs, _, _ = diffcp.solve_only_batch(
+                        As, bs, cs, cone_dicts, **solver_args)
             except diffcp.SolverError as e:
                 print(
                     "Please consider re-formulating your problem so that "

diff --git a/cvxpylayers/torch/test_cvxpylayer.py b/cvxpylayers/torch/test_cvxpylayer.py
@@ -87,10 +87,9 @@ def test_least_squares(self):
 
         def lstsq(
             A,
-            b): return torch.solve(
-            (A_th.t() @ b_th).unsqueeze(1),
-            A_th.t() @ A_th +
-            torch.eye(n).double())[0]
+            b): return torch.linalg.solve(
+            A.t() @ A + torch.eye(n, dtype=torch.float64),
+                (A.t() @ b).unsqueeze(1))
         x_lstsq = lstsq(A_th, b_th)
 
         grad_A_cvxpy, grad_b_cvxpy = grad(x.sum(), [A_th, b_th])
@@ -325,10 +324,9 @@ def test_broadcasting(self):
 
         def lstsq(
             A,
-            b): return torch.solve(
-            (A.t() @ b).unsqueeze(1),
-            A.t() @ A +
-            torch.eye(n).double())[0]
+            b): return torch.linalg.solve(
+            A.t() @ A + torch.eye(n).double(),
+                (A.t() @ b).unsqueeze(1))
         x_lstsq = lstsq(A_th, b_th_0)
 
         grad_A_cvxpy, grad_b_cvxpy = grad(x.sum(), [A_th, b_th])
@@ -416,6 +414,43 @@ def test_basic_gp(self):
                 "eps": 1e-12, "acceleration_lookback": 0})[0].sum(),
                 (a_tch, b_tch, c_tch), atol=1e-3, rtol=1e-3)
 
+    def test_no_grad_context(self):
+        n, m = 2, 3
+        x = cp.Variable(n)
+        A = cp.Parameter((m, n))
+        b = cp.Parameter(m)
+        constraints = [x >= 0]
+        objective = cp.Minimize(0.5 * cp.pnorm(A @ x - b, p=1))
+        problem = cp.Problem(objective, constraints)
+        assert problem.is_dpp()
+
+        cvxpylayer = CvxpyLayer(problem, parameters=[A, b], variables=[x])
+        A_tch = torch.randn(m, n, requires_grad=True)
+        b_tch = torch.randn(m, requires_grad=True)
+
+        with torch.no_grad():
+            solution, = cvxpylayer(A_tch, b_tch)
+
+        self.assertFalse(solution.requires_grad)
+
+    def test_requires_grad_false(self):
+        n, m = 2, 3
+        x = cp.Variable(n)
+        A = cp.Parameter((m, n))
+        b = cp.Parameter(m)
+        constraints = [x >= 0]
+        objective = cp.Minimize(0.5 * cp.pnorm(A @ x - b, p=1))
+        problem = cp.Problem(objective, constraints)
+        assert problem.is_dpp()
+
+        cvxpylayer = CvxpyLayer(problem, parameters=[A, b], variables=[x])
+        A_tch = torch.randn(m, n, requires_grad=False)
+        b_tch = torch.randn(m, requires_grad=False)
+
+        solution, = cvxpylayer(A_tch, b_tch)
+
+        self.assertFalse(solution.requires_grad)
+
 
 if __name__ == '__main__':
     unittest.main()