Skip to content

Commit

Permalink
Remove dtensor
Browse files Browse the repository at this point in the history
We will replace this with the work on
#1267
But we have no coverage for that PR till we run tests against Keras 3,
which will probably still be about a week.

For now, let's just remove this usage, which is no longer needed and
will break a Keras 3 install.
  • Loading branch information
mattdangerw committed Oct 9, 2023
1 parent 7b2f69b commit 5786acd
Show file tree
Hide file tree
Showing 9 changed files with 0 additions and 275 deletions.
5 changes: 0 additions & 5 deletions keras_nlp/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,3 @@ def pytest_collection_modifyitems(config, items):
tf.debugging.disable_traceback_filtering()
if backend_config.multi_backend():
keras.config.disable_traceback_filtering()

# One off setup for dtensor tests.
if not backend_config.multi_backend():
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)
72 changes: 0 additions & 72 deletions keras_nlp/models/gpt2/gpt2_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@

import copy

from tensorflow.experimental import dtensor
from tensorflow.experimental.dtensor import Layout
from tensorflow.keras.dtensor.experimental import LayoutMap

from keras_nlp.api_export import keras_nlp_export
from keras_nlp.backend import keras
from keras_nlp.layers.modeling.position_embedding import PositionEmbedding
Expand Down Expand Up @@ -190,71 +186,3 @@ def get_config(self):
@classproperty
def presets(cls):
return copy.deepcopy(backbone_presets)

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for a GPT2Backbone.
Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.GPT2Backbone`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).
Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.
Returns:
A `tf.keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.
Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)
# Update both dimensions below for a multi-device setting.
mesh = dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.GPT2Backbone.create_layout_map(mesh)
with layout_map.scope():
model = keras_nlp.models.GPT2Backbone.from_preset("gpt2_base_en")
```
"""
# We assert the mesh is 2D, and assume the first mesh dim is for data
# parallel and the second dim is for model parallel.
mesh_shape = mesh.shape()
if len(mesh_shape) != 2:
raise ValueError(
f"Expect to create layout based on 2D mesh, received {mesh}"
)
_, model_dim = mesh.dim_names
unshard_dim = dtensor.UNSHARDED

layout_map = LayoutMap(mesh=mesh)
# Embedding sharding
layout_map[r".*embeddings"] = Layout([unshard_dim, model_dim], mesh)

# Transformer block sharding
layout_map[r".*_(query|key|value)_dense.kernel"] = Layout(
[unshard_dim, unshard_dim, model_dim], mesh
)
layout_map[r".*_(query|key|value)_dense.bias"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.kernel"] = Layout(
[unshard_dim, model_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.bias"] = Layout(
[model_dim], mesh
)
layout_map[r".*_feedforward_output_dense.kernel"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_output_dense.bias"] = Layout(
[unshard_dim], mesh
)
return layout_map
17 changes: 0 additions & 17 deletions keras_nlp/models/gpt2/gpt2_backbone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,3 @@ def test_saved_model(self):
# Check that output matches.
restored_output = restored_model(self.input_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with GPT2Backbone.create_layout_map(mesh).scope():
GPT2Backbone(
vocabulary_size=10,
num_layers=2,
num_heads=2,
hidden_dim=2,
intermediate_dim=4,
max_sequence_length=5,
)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()
36 changes: 0 additions & 36 deletions keras_nlp/models/gpt2/gpt2_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,39 +325,3 @@ def next(prompt, cache, index):
"token_ids": token_ids,
"padding_mask": padding_mask,
}

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for a GPT2CausalLM.
Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.GPT2CausalLM`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).
Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.
Returns:
A `keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.
Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)
# Update both dimensions below for a multi-device setting.
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.GPT2CausalLM.create_layout_map(mesh)
with layout_map.scope():
gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset("gpt2_base_en")
```
"""
# As this task has no new variables, we just re-use the backbone method.
return cls.backbone_cls.create_layout_map(mesh)
10 changes: 0 additions & 10 deletions keras_nlp/models/gpt2/gpt2_causal_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,3 @@ def test_saved_model(self):
keras.utils.set_random_seed(42)
restored_output = restored_model.predict(self.raw_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with GPT2CausalLM.create_layout_map(mesh).scope():
GPT2CausalLM(backbone=self.backbone)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()
72 changes: 0 additions & 72 deletions keras_nlp/models/opt/opt_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@

import copy

from tensorflow.experimental import dtensor
from tensorflow.experimental.dtensor import Layout
from tensorflow.keras.dtensor.experimental import LayoutMap

from keras_nlp.api_export import keras_nlp_export
from keras_nlp.backend import keras
from keras_nlp.layers.modeling.token_and_position_embedding import (
Expand Down Expand Up @@ -168,71 +164,3 @@ def get_config(self):
@classproperty
def presets(cls):
return copy.deepcopy(backbone_presets)

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for an OPTBackbone.
Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.OPTBackbone`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).
Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.
Returns:
A `tf.keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.
Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)
# Update both dimensions below for a multi-device setting.
mesh = dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.OPTBackbone.create_layout_map(mesh)
with layout_map.scope():
model = keras_nlp.models.OPTBackbone.from_preset("opt_125m_en")
```
"""
# We assert the mesh is 2D, and assume the first mesh dim is for data
# parallel and the second dim is for model parallel.
mesh_shape = mesh.shape()
if len(mesh_shape) != 2:
raise ValueError(
f"Expect to create layout based on 2D mesh, received {mesh}"
)
_, model_dim = mesh.dim_names
unshard_dim = dtensor.UNSHARDED

layout_map = LayoutMap(mesh=mesh)
# Embedding sharding
layout_map[r".*embeddings"] = Layout([unshard_dim, model_dim], mesh)

# Transformer block sharding
layout_map[r".*_(query|key|value)_dense.kernel"] = Layout(
[unshard_dim, unshard_dim, model_dim], mesh
)
layout_map[r".*_(query|key|value)_dense.bias"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.kernel"] = Layout(
[unshard_dim, model_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.bias"] = Layout(
[model_dim], mesh
)
layout_map[r".*_feedforward_output_dense.kernel"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_output_dense.bias"] = Layout(
[unshard_dim], mesh
)
return layout_map
17 changes: 0 additions & 17 deletions keras_nlp/models/opt/opt_backbone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,3 @@ def test_saved_model(self):
# Check that output matches.
restored_output = restored_model(self.input_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with OPTBackbone.create_layout_map(mesh).scope():
OPTBackbone(
vocabulary_size=10,
num_layers=2,
num_heads=2,
hidden_dim=2,
intermediate_dim=4,
max_sequence_length=5,
)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()
36 changes: 0 additions & 36 deletions keras_nlp/models/opt/opt_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,39 +321,3 @@ def next(prompt, cache, index):
"token_ids": token_ids,
"padding_mask": padding_mask,
}

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for an OPTCausalLM.
Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.OPTCausalLM`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).
Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.
Returns:
A `tf.keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.
Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)
# Update both dimensions below for a multi-device setting.
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.OPTCausalLM.create_layout_map(mesh)
with layout_map.scope():
opt_lm = keras_nlp.models.OPTCausalLM.from_preset("opt_125m_en")
```
"""
# As this task has no new variables, we just re-use the backbone method.
return cls.backbone_cls.create_layout_map(mesh)
10 changes: 0 additions & 10 deletions keras_nlp/models/opt/opt_causal_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,3 @@ def test_saved_model(self):
keras.utils.set_random_seed(42)
restored_output = restored_model.predict(self.raw_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with OPTCausalLM.create_layout_map(mesh).scope():
OPTCausalLM(backbone=self.backbone)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()

0 comments on commit 5786acd

Please sign in to comment.