[SYSTEMDS-3694] Python NN Sequence and layer interface

This commit: - Adds a Layer interface for the Python API. - Affine and ReLU classes are changed to extend this interface. - Fixes fixes some small formatting issues in the modified classes. - Adds a Sequential primitive to the nn Python API. It is able to combine multiple nn layers into one sequential module. - fix in the python MultiReturn so outputs of the instance can be properly accessed. - Adds the backwards pass to the Sequential primitives. - Variations to Sequential testing involving MultiReturns. - Test if the input gradient is set correctly on the backwards pass and Fixes a bug where this was not the case on the affine layer. - Testing to verify that the layer gets updated correctly during forward and backward pass. AMLS project SoSe'24 Closes #2025
apache · Jul 1, 2024 · 8e1e53b · 8e1e53b
1 parent 8f2a18a
commit 8e1e53b
Show file tree

Hide file tree

Showing 8 changed files with 572 additions and 44 deletions.
diff --git a/src/main/python/systemds/operator/nn/affine.py b/src/main/python/systemds/operator/nn/affine.py
@@ -18,33 +18,24 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import os
-
 from systemds.context import SystemDSContext
-from systemds.operator import Matrix, Source, MultiReturn
-from systemds.utils.helpers import get_path_to_script_layers
+from systemds.operator import Matrix, MultiReturn
+from systemds.operator.nn.layer import Layer
 
 
-class Affine:
-    _source: Source = None
+class Affine(Layer):
     weight: Matrix
     bias: Matrix
 
-    def __new__(cls, *args, **kwargs):
-        return super().__new__(cls)
-
     def __init__(self, sds_context: SystemDSContext, d, m, seed=-1):
         """
         sds_context: The systemdsContext to construct the layer inside of
         d: The number of features that are input to the affine layer
         m: The number of neurons that are contained in the layer, 
             and the number of features output
         """
-        Affine._create_source(sds_context)
-
-        # bypassing overload limitation in python
-        self.forward = self._instance_forward
-        self.backward = self._instance_backward
+        super().__init__(sds_context, 'affine.dml')
+        self._X = None
 
         # init weight and bias
         self.weight = Matrix(sds_context, '')
@@ -64,7 +55,7 @@ def forward(X: Matrix, W: Matrix, b: Matrix):
         b: The bias added in the output.
         return out: An output matrix.
         """
-        Affine._create_source(X.sds_context)
+        Affine._create_source(X.sds_context, "affine.dml")
         return Affine._source.forward(X, W, b)
 
     @staticmethod
@@ -77,7 +68,7 @@ def backward(dout:Matrix, X: Matrix, W: Matrix, b: Matrix):
         return dX, dW, db: The gradients of: input X, weights and bias.
         """
         sds = X.sds_context
-        Affine._create_source(sds)
+        Affine._create_source(sds, "affine.dml")
         params_dict = {'dout': dout, 'X': X, 'W': W, 'b': b}
         dX = Matrix(sds, '')
         dW = Matrix(sds, '')
@@ -104,11 +95,6 @@ def _instance_backward(self, dout: Matrix, X: Matrix):
         X: The input to this layer.
         return dX, dW,db: gradient of input, weights and bias, respectively
         """
-        return Affine.backward(dout, X, self.weight, self.bias)
-
-    @staticmethod
-    def _create_source(sds: SystemDSContext):
-        if Affine._source is None or Affine._source.sds_context != sds:
-            path = get_path_to_script_layers()
-            path = os.path.join(path, "affine.dml")
-            Affine._source = sds.source(path, "affine")
+        gradients = Affine.backward(dout, X, self.weight, self.bias)
+        self._X = gradients[0]
+        return gradients
diff --git a/src/main/python/systemds/operator/nn/layer.py b/src/main/python/systemds/operator/nn/layer.py
@@ -0,0 +1,69 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+import os
+
+from systemds.context import SystemDSContext
+from systemds.operator import Source
+from systemds.utils.helpers import get_path_to_script_layers
+
+
+class Layer:
+    """
+    Interface for neural network layers
+    """
+
+    _source: Source = None
+
+    def __init__(self, sds_context: SystemDSContext = None, dml_script: str = None):
+        if sds_context is not None and dml_script is not None:
+            self.__class__._create_source(sds_context, dml_script)
+
+        # bypassing overload limitation in python
+        self.forward = self._instance_forward
+        self.backward = self._instance_backward
+
+    @classmethod
+    def _create_source(cls, sds_context: SystemDSContext, dml_script: str):
+        """
+        Create SystemDS source
+        :param sds_context: SystemDS context
+        :param dml_script: DML script inside /scripts/nn/layers/
+        :return:
+        """
+        if cls._source is None or cls._source.sds_context != sds_context:
+            script_path = get_path_to_script_layers()
+            path = os.path.join(script_path, dml_script)
+            name = dml_script.split(".")[0]
+            cls._source = sds_context.source(path, name)
+
+    def _instance_forward(self, *args):
+        raise NotImplementedError
+
+    def _instance_backward(self, *args):
+        raise NotImplementedError
+
+    @staticmethod
+    def forward(*args):
+        raise NotImplementedError
+
+    @staticmethod
+    def backward(*args):
+        raise NotImplementedError
diff --git a/src/main/python/systemds/operator/nn/relu.py b/src/main/python/systemds/operator/nn/relu.py
@@ -18,28 +18,24 @@
 # under the License.
 #
 # -------------------------------------------------------------
-import os.path
-
 from systemds.context import SystemDSContext
 from systemds.operator import Matrix, Source
-from systemds.utils.helpers import get_path_to_script_layers
+from systemds.operator.nn.layer import Layer
 
 
-class ReLU:
+class ReLU(Layer):
     _source: Source = None
 
-    def __init__(self, sds: SystemDSContext):
-        ReLU._create_source(sds)
-        self.forward = self._instance_forward
-        self.backward = self._instance_backward
+    def __init__(self, sds_context: SystemDSContext):
+        super().__init__(sds_context, "relu.dml")
 
     @staticmethod
     def forward(X: Matrix):
         """
         X: input matrix
         return out: output matrix
         """
-        ReLU._create_source(X.sds_context)
+        ReLU._create_source(X.sds_context, "relu.dml")
         return ReLU._source.forward(X)
 
     @staticmethod
@@ -49,7 +45,7 @@ def backward(dout: Matrix, X: Matrix):
         X: input matrix
         return dX: gradient of input
         """
-        ReLU._create_source(dout.sds_context)
+        ReLU._create_source(dout.sds_context, "relu.dml")
         return ReLU._source.backward(dout, X)
 
     def _instance_forward(self, X: Matrix):
@@ -58,11 +54,3 @@ def _instance_forward(self, X: Matrix):
 
     def _instance_backward(self, dout: Matrix, X: Matrix):
         return ReLU.backward(dout, X)
-
-    @staticmethod
-    def _create_source(sds: SystemDSContext):
-        if ReLU._source is None or ReLU._source.sds_context != sds:
-            path = get_path_to_script_layers()
-            path = os.path.join(path, "relu.dml")
-            ReLU._source = sds.source(path, "relu")
-
diff --git a/src/main/python/systemds/operator/nn/sequential.py b/src/main/python/systemds/operator/nn/sequential.py
@@ -0,0 +1,97 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+from systemds.operator import MultiReturn
+from systemds.operator.nn.layer import Layer
+
+
+class Sequential(Layer):
+    def __init__(self, *args):
+        super().__init__()
+
+        self.layers = []
+        if len(args) == 1 and isinstance(args[0], list):
+            self.layers = args[0]
+        else:
+            self.layers = list(args)
+
+    def __len__(self):
+        return len(self.layers)
+
+    def __getitem__(self, idx):
+        return self.layers[idx]
+
+    def __setitem__(self, idx, value):
+        self.layers[idx] = value
+
+    def __delitem__(self, idx):
+        del self.layers[idx]
+
+    def __iter__(self):
+        return iter(self.layers)
+
+    def __reversed__(self):
+        return reversed(self.layers)
+
+    def push(self, layer: Layer):
+        """
+        Add layer
+        :param layer: Layer
+        :return:
+        """
+        self.layers.append(layer)
+
+    def pop(self):
+        """
+        Remove last layer
+        :return: Layer
+        """
+        return self.layers.pop()
+
+    def _instance_forward(self, X):
+        """
+        Forward pass
+        :param X: Input matrix
+        :return: output matrix
+        """
+        out = X
+        for layer in self:
+            out = layer.forward(out)
+
+            # if MultiReturn, take only output matrix
+            if isinstance(out, MultiReturn):
+                out = out[0]
+        return out
+
+    def _instance_backward(self, dout, X):
+        """
+        Backward pass
+        :param dout: gradient of output, passed from the upstream
+        :param X: input matrix
+        :return: output matrix
+        """
+        dx = dout
+        for layer in reversed(self):
+            dx = layer.backward(dx, X)
+
+            # if MultiReturn, take only gradient of input
+            if isinstance(dx, MultiReturn):
+                dx = dx[0]
+        return dx
diff --git a/src/main/python/systemds/operator/nodes/multi_return.py b/src/main/python/systemds/operator/nodes/multi_return.py
@@ -47,7 +47,7 @@ def __init__(self, sds_context, operation,
                          named_input_nodes, OutputType.MULTI_RETURN, False)
 
     def __getitem__(self, key):
-        self._outputs[key]
+        return self._outputs[key]
 
     def code_line(self, var_name: str, unnamed_input_vars: Sequence[str],
                   named_input_vars: Dict[str, str]) -> str:

diff --git a/src/main/python/tests/nn/test_affine.py b/src/main/python/tests/nn/test_affine.py
@@ -77,6 +77,7 @@ def test_forward(self):
         out = affine.forward(Xm).compute()
         self.assertEqual(len(out), 5)
         self.assertEqual(len(out[0]), 6)
+        assert_almost_equal(affine._X.compute(), Xm.compute())
 
         # test static method
         out = Affine.forward(Xm, Wm, bm).compute()
@@ -91,10 +92,13 @@ def test_backward(self):
 
         # test class method
         affine = Affine(self.sds, dim, m, 10)
-        [dx, dw, db] = affine.backward(doutm, Xm).compute()
+        gradients = affine.backward(doutm, Xm)
+        intermediate = affine._X.compute()
+        [dx, dw, db] = gradients.compute()
         assert len(dx) == 5 and len(dx[0]) == 6
         assert len(dw) == 6 and len(dx[0]) == 6
         assert len(db) == 1 and len(db[0]) == 6
+        assert_almost_equal(intermediate, dx)
 
         # test static method
         res = Affine.backward(doutm, Xm, Wm, bm).compute()