Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove dtensor #1268

Merged
merged 1 commit into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions keras_nlp/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,3 @@ def pytest_collection_modifyitems(config, items):
tf.debugging.disable_traceback_filtering()
if backend_config.multi_backend():
keras.config.disable_traceback_filtering()

# One off setup for dtensor tests.
if not backend_config.multi_backend():
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)
72 changes: 0 additions & 72 deletions keras_nlp/models/gpt2/gpt2_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@

import copy

from tensorflow.experimental import dtensor
from tensorflow.experimental.dtensor import Layout
from tensorflow.keras.dtensor.experimental import LayoutMap

from keras_nlp.api_export import keras_nlp_export
from keras_nlp.backend import keras
from keras_nlp.layers.modeling.position_embedding import PositionEmbedding
Expand Down Expand Up @@ -190,71 +186,3 @@ def get_config(self):
@classproperty
def presets(cls):
return copy.deepcopy(backbone_presets)

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for a GPT2Backbone.

Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.GPT2Backbone`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).

Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.

Returns:
A `tf.keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.

Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)

# Update both dimensions below for a multi-device setting.
mesh = dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.GPT2Backbone.create_layout_map(mesh)

with layout_map.scope():
model = keras_nlp.models.GPT2Backbone.from_preset("gpt2_base_en")
```
"""
# We assert the mesh is 2D, and assume the first mesh dim is for data
# parallel and the second dim is for model parallel.
mesh_shape = mesh.shape()
if len(mesh_shape) != 2:
raise ValueError(
f"Expect to create layout based on 2D mesh, received {mesh}"
)
_, model_dim = mesh.dim_names
unshard_dim = dtensor.UNSHARDED

layout_map = LayoutMap(mesh=mesh)
# Embedding sharding
layout_map[r".*embeddings"] = Layout([unshard_dim, model_dim], mesh)

# Transformer block sharding
layout_map[r".*_(query|key|value)_dense.kernel"] = Layout(
[unshard_dim, unshard_dim, model_dim], mesh
)
layout_map[r".*_(query|key|value)_dense.bias"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.kernel"] = Layout(
[unshard_dim, model_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.bias"] = Layout(
[model_dim], mesh
)
layout_map[r".*_feedforward_output_dense.kernel"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_output_dense.bias"] = Layout(
[unshard_dim], mesh
)
return layout_map
17 changes: 0 additions & 17 deletions keras_nlp/models/gpt2/gpt2_backbone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,3 @@ def test_saved_model(self):
# Check that output matches.
restored_output = restored_model(self.input_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with GPT2Backbone.create_layout_map(mesh).scope():
GPT2Backbone(
vocabulary_size=10,
num_layers=2,
num_heads=2,
hidden_dim=2,
intermediate_dim=4,
max_sequence_length=5,
)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()
36 changes: 0 additions & 36 deletions keras_nlp/models/gpt2/gpt2_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,39 +325,3 @@ def next(prompt, cache, index):
"token_ids": token_ids,
"padding_mask": padding_mask,
}

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for a GPT2CausalLM.

Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.GPT2CausalLM`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).

Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.

Returns:
A `keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.

Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)

# Update both dimensions below for a multi-device setting.
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.GPT2CausalLM.create_layout_map(mesh)

with layout_map.scope():
gpt2_lm = keras_nlp.models.GPT2CausalLM.from_preset("gpt2_base_en")
```
"""
# As this task has no new variables, we just re-use the backbone method.
return cls.backbone_cls.create_layout_map(mesh)
10 changes: 0 additions & 10 deletions keras_nlp/models/gpt2/gpt2_causal_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,3 @@ def test_saved_model(self):
keras.utils.set_random_seed(42)
restored_output = restored_model.predict(self.raw_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with GPT2CausalLM.create_layout_map(mesh).scope():
GPT2CausalLM(backbone=self.backbone)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()
72 changes: 0 additions & 72 deletions keras_nlp/models/opt/opt_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@

import copy

from tensorflow.experimental import dtensor
from tensorflow.experimental.dtensor import Layout
from tensorflow.keras.dtensor.experimental import LayoutMap

from keras_nlp.api_export import keras_nlp_export
from keras_nlp.backend import keras
from keras_nlp.layers.modeling.token_and_position_embedding import (
Expand Down Expand Up @@ -168,71 +164,3 @@ def get_config(self):
@classproperty
def presets(cls):
return copy.deepcopy(backbone_presets)

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for an OPTBackbone.

Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.OPTBackbone`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).

Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.

Returns:
A `tf.keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.

Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)

# Update both dimensions below for a multi-device setting.
mesh = dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.OPTBackbone.create_layout_map(mesh)

with layout_map.scope():
model = keras_nlp.models.OPTBackbone.from_preset("opt_125m_en")
```
"""
# We assert the mesh is 2D, and assume the first mesh dim is for data
# parallel and the second dim is for model parallel.
mesh_shape = mesh.shape()
if len(mesh_shape) != 2:
raise ValueError(
f"Expect to create layout based on 2D mesh, received {mesh}"
)
_, model_dim = mesh.dim_names
unshard_dim = dtensor.UNSHARDED

layout_map = LayoutMap(mesh=mesh)
# Embedding sharding
layout_map[r".*embeddings"] = Layout([unshard_dim, model_dim], mesh)

# Transformer block sharding
layout_map[r".*_(query|key|value)_dense.kernel"] = Layout(
[unshard_dim, unshard_dim, model_dim], mesh
)
layout_map[r".*_(query|key|value)_dense.bias"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.kernel"] = Layout(
[unshard_dim, model_dim], mesh
)
layout_map[r".*_feedforward_intermediate_dense.bias"] = Layout(
[model_dim], mesh
)
layout_map[r".*_feedforward_output_dense.kernel"] = Layout(
[model_dim, unshard_dim], mesh
)
layout_map[r".*_feedforward_output_dense.bias"] = Layout(
[unshard_dim], mesh
)
return layout_map
17 changes: 0 additions & 17 deletions keras_nlp/models/opt/opt_backbone_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,20 +84,3 @@ def test_saved_model(self):
# Check that output matches.
restored_output = restored_model(self.input_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with OPTBackbone.create_layout_map(mesh).scope():
OPTBackbone(
vocabulary_size=10,
num_layers=2,
num_heads=2,
hidden_dim=2,
intermediate_dim=4,
max_sequence_length=5,
)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()
36 changes: 0 additions & 36 deletions keras_nlp/models/opt/opt_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,39 +321,3 @@ def next(prompt, cache, index):
"token_ids": token_ids,
"padding_mask": padding_mask,
}

@classmethod
def create_layout_map(cls, mesh):
"""Create a DTensor layout map for an OPTCausalLM.

Given a DTensor mesh describing a list of devices, this method returns a
DTensor layout map for creating a `keras_nlp.models.OPTCausalLM`
instance. This mapping describes how to distribute all model weights
across multiple devices. For an overview of DTensor concepts, see
[this guide](https://www.tensorflow.org/guide/dtensor_overview).

Args:
mesh: A 2D `tf.experimental.dtensor.Mesh` describing the arrangement
of devices for running distributed computation. The
first dimension in the mesh is expected to be for data parallel
distribution, and the second for model parallel distribution.

Returns:
A `tf.keras.dtensor.experimental.LayoutMap` which contains the
proper layout to weights mapping for the model parallel setting.

Examples:
```python
keras.backend.experimental.enable_tf_random_generator()
keras.utils.set_random_seed(1337)

# Update both dimensions below for a multi-device setting.
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
layout_map = keras_nlp.models.OPTCausalLM.create_layout_map(mesh)

with layout_map.scope():
opt_lm = keras_nlp.models.OPTCausalLM.from_preset("opt_125m_en")
```
"""
# As this task has no new variables, we just re-use the backbone method.
return cls.backbone_cls.create_layout_map(mesh)
10 changes: 0 additions & 10 deletions keras_nlp/models/opt/opt_causal_lm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,3 @@ def test_saved_model(self):
keras.utils.set_random_seed(42)
restored_output = restored_model.predict(self.raw_batch)
self.assertAllClose(model_output, restored_output)

def test_create_layout_map(self):
mesh = tf.experimental.dtensor.create_mesh([("batch", 1), ("model", 1)])
with OPTCausalLM.create_layout_map(mesh).scope():
OPTCausalLM(backbone=self.backbone)
# Using DTensor enables the mlir bridge as a side effect. Eventually
# this will be default, but for now we have compile errors with the
# bridge elsewhere and must disable. See
# https://github.com/keras-team/keras-nlp/issues/1001
tf.config.experimental.disable_mlir_bridge()
Loading