Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Directory Structure and Update CI/CD Pipelines #52

Merged
merged 11 commits into from
May 9, 2024
39 changes: 0 additions & 39 deletions .flake8

This file was deleted.

3 changes: 1 addition & 2 deletions .github/workflows/cla.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

name: "cla-bot"
on:
issue_comment:
Expand All @@ -25,7 +24,7 @@ jobs:
allowlist: user1,bot*
remote-organization-name: mlcommons
remote-repository-name: systems

#below are the optional inputs - If the optional inputs are not given, then default values will be taken
#remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
#remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository)
Expand Down
32 changes: 0 additions & 32 deletions .github/workflows/main.yml

This file was deleted.

3 changes: 0 additions & 3 deletions .github/workflows/python_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,3 @@ jobs:

- name: Run ruff
run: ruff format --check --diff .

- name: Run Pyre Check
run: pyre check
12 changes: 0 additions & 12 deletions .pyre_configuration

This file was deleted.

36 changes: 20 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,36 @@ Documentation = "https://github.com/mlcommons/chakra/README.md"
Repository = "https://github.com/mlcommons/chakra.git"

[tool.setuptools.package-dir]
"chakra.et_def" = "et_def"
"chakra.et_converter" = "et_converter"
"chakra.et_visualizer" = "et_visualizer"
"chakra.timeline_visualizer" = "timeline_visualizer"
"chakra.et_generator" = "utils/et_generator"
"chakra.et_jsonizer" = "utils/et_jsonizer"
"chakra.trace_link" = "trace_link"
"chakra.third_party.utils" = "third_party/utils"
"chakra.schema.protobuf" = "schema/protobuf"
"chakra.src.converter" = "src/converter"
"chakra.src.generator" = "src/generator"
"chakra.src.jsonizer" = "src/jsonizer"
"chakra.src.third_party" = "src/third_party"
"chakra.src.timeline_visualizer" = "src/timeline_visualizer"
"chakra.src.trace_link" = "src/trace_link"
"chakra.src.visualizer" = "src/visualizer"

[tool.setuptools.package-data]
"chakra.et_def" = ["et_def_pb2.pyi", "et_def.proto"]
"chakra.schema.protobuf" = ["et_def.proto"]

[project.scripts]
chakra_converter = "chakra.et_converter.et_converter:main"
chakra_visualizer = "chakra.et_visualizer.et_visualizer:main"
chakra_timeline_visualizer = "chakra.et_timeline_visualizer.et_timeline_visualizer:main"
chakra_generator = "chakra.et_generator.et_generator:main"
chakra_jsonizer = "chakra.et_jsonizer.et_jsonizer:main"
chakra_trace_link = "chakra.trace_link.trace_link:main"
chakra_converter = "chakra.src.converter.converter:main"
chakra_generator = "chakra.src.generator.generator:main"
chakra_jsonizer = "chakra.src.jsonizer.jsonizer:main"
chakra_timeline_visualizer = "chakra.src.timeline_visualizer.timeline_visualizer:main"
chakra_trace_link = "chakra.src.trace_link.trace_link:main"
chakra_visualizer = "chakra.src.visualizer.visualizer:main"

[tool.ruff]
target-version = "py39"
line-length = 120
exclude = [
"src/converter/text_converter.py",
"src/third_party/utils/protolib.py",
]

[tool.ruff.lint]
select = ["I", "B", "E", "F", "SIM", "W", "C90", "EXE"]
select = ["I", "B", "E", "F", "SIM", "W", "C90"]

[tool.ruff.format]
indent-style = "space"
File renamed without changes.
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build_grpc]
proto_files = et_def.proto
grpc_files = et_def.proto
proto_path = et_def
output_path = et_def
proto_path = schema/protobuf/
output_path = schema/protobuf/
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from distutils.command.build import build

from setuptools import setup


Expand Down
14 changes: 6 additions & 8 deletions et_converter/et_converter.py → src/converter/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import logging
import sys
import traceback

from logging import FileHandler
from .text2chakra_converter import Text2ChakraConverter
from .pytorch2chakra_converter import PyTorch2ChakraConverter

from .pytorch_converter import PyTorchConverter
from .text_converter import TextConverter


def get_logger(log_filename: str) -> logging.Logger:
Expand Down Expand Up @@ -52,17 +52,15 @@ def main() -> None:

try:
if args.input_type == "Text":
converter = Text2ChakraConverter(
args.input_filename, args.output_filename, args.num_npus, args.num_passes, logger
)
converter = TextConverter(args.input_filename, args.output_filename, args.num_npus, args.num_passes, logger)
converter.convert()
elif args.input_type == "PyTorch":
converter = PyTorch2ChakraConverter(args.input_filename, args.output_filename, logger)
converter = PyTorchConverter(args.input_filename, args.output_filename, logger)
converter.convert()
else:
logger.error(f"{args.input_type} unsupported")
sys.exit(1)
except Exception as e:
except Exception:
traceback.print_exc()
logger.debug(traceback.format_exc())
sys.exit(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,32 @@

import json
import logging
from typing import Dict, List, Optional, Tuple, Set
from typing import Dict, List, Optional, Set, Tuple

from .pytorch_node import PyTorchNodeType, PyTorchNode
from chakra.third_party.utils.protolib import encodeMessage as encode_message
from chakra.et_def.et_def_pb2 import (
GlobalMetadata,
NodeType as ChakraNodeType,
Node as ChakraNode,
AttributeProto as ChakraAttr,
COMP_NODE,
COMM_COLL_NODE,
ALL_REDUCE,
from ...schema.protobuf.et_def_pb2 import (
ALL_GATHER,
BROADCAST,
ALL_REDUCE,
ALL_TO_ALL,
BROADCAST,
COMM_COLL_NODE,
COMP_NODE,
REDUCE_SCATTER,
GlobalMetadata,
)
from ...schema.protobuf.et_def_pb2 import (
AttributeProto as ChakraAttr,
)
from ...schema.protobuf.et_def_pb2 import (
Node as ChakraNode,
)
from ...schema.protobuf.et_def_pb2 import (
NodeType as ChakraNodeType,
)
from ..third_party.utils.protolib import encodeMessage as encode_message
from .pytorch_node import PyTorchNode, PyTorchNodeType


class PyTorch2ChakraConverter:
class PyTorchConverter:
"""
Converter class for transforming PyTorch execution traces into Chakra format.

Expand Down Expand Up @@ -94,7 +100,7 @@ def convert(self) -> None:

self.open_chakra_execution_trace()

for pytorch_nid, pytorch_node in self.pytorch_nodes.items():
for _, pytorch_node in self.pytorch_nodes.items():
if (pytorch_node.get_op_type() == PyTorchNodeType.CPU_OP) or (
pytorch_node.get_op_type() == PyTorchNodeType.LABEL
):
Expand Down Expand Up @@ -148,7 +154,7 @@ def load_pytorch_execution_traces(self) -> None:
self._parse_and_instantiate_nodes(pytorch_et_data)
except IOError as e:
self.logger.error(f"Error opening file {self.input_filename}: {e}")
raise Exception(f"Could not open file {self.input_filename}")
raise Exception(f"Could not open file {self.input_filename}") from e

def _parse_and_instantiate_nodes(self, pytorch_et_data: Dict) -> None:
"""
Expand All @@ -173,7 +179,7 @@ def _parse_and_instantiate_nodes(self, pytorch_et_data: Dict) -> None:
}
self._establish_parent_child_relationships(pytorch_node_objects)

def _establish_parent_child_relationships(self, pytorch_node_objects: Dict[int, PyTorchNode]) -> None:
def _establish_parent_child_relationships(self, pytorch_node_objects: Dict[int, PyTorchNode]) -> None: # noqa: C901
"""
Establishes parent-child relationships among PyTorch nodes and counts
the node types.
Expand Down Expand Up @@ -241,11 +247,11 @@ def open_chakra_execution_trace(self) -> None:
"""
self.logger.info(f"Opening Chakra execution trace file: {self.output_filename}")
try:
self.chakra_et = open(self.output_filename, "wb")
self.chakra_et = open(self.output_filename, "wb") # noqa: SIM115
except IOError as e:
err_msg = f"Error opening file {self.output_filename}: {e}"
self.logger.error(err_msg)
raise Exception(err_msg)
raise Exception(err_msg) from e

def convert_to_chakra_node(self, pytorch_node: PyTorchNode) -> ChakraNode:
"""
Expand Down Expand Up @@ -296,9 +302,11 @@ def get_chakra_node_type_from_pytorch_node(self, pytorch_node: PyTorchNode) -> C
Returns:
int: The corresponding Chakra node type.
"""
if pytorch_node.is_gpu_op() and ("ncclKernel" in pytorch_node.name or "ncclDevKernel" in pytorch_node.name):
return COMM_COLL_NODE
elif ("c10d::" in pytorch_node.name) or ("nccl:" in pytorch_node.name):
if (
pytorch_node.is_gpu_op()
and ("ncclKernel" in pytorch_node.name or "ncclDevKernel" in pytorch_node.name)
or (("c10d::" in pytorch_node.name) or ("nccl:" in pytorch_node.name))
):
return COMM_COLL_NODE
return COMP_NODE

Expand Down Expand Up @@ -354,7 +362,7 @@ def is_root_node(self, node):
if node.name in ["[pytorch|profiler|execution_graph|thread]", "[pytorch|profiler|execution_trace|thread]"]:
return True

def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None:
def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None: # noqa: C901
"""
Traverses nodes based on control dependencies (parent nodes) and encodes
data dependencies appropriately. This method is crucial for converting the
Expand Down Expand Up @@ -422,13 +430,11 @@ def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None:
node_op_type = pytorch_node.get_op_type()

if node_op_type == PyTorchNodeType.GPU_OP:
if last_visited_any:
if last_visited_any.id not in current_node.data_deps:
current_node.data_deps.append(last_visited_any.id)
self.logger.debug(
f"GPU Node ID {current_node.id} now has a data "
f"dependency on Node ID {last_visited_any.id}"
)
if (last_visited_any) and (last_visited_any.id not in current_node.data_deps):
current_node.data_deps.append(last_visited_any.id)
self.logger.debug(
f"GPU Node ID {current_node.id} now has a data " f"dependency on Node ID {last_visited_any.id}"
)

last_visited_any = last_visited_non_gpu
else:
Expand All @@ -440,13 +446,12 @@ def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None:
f"CPU Node ID {current_node.id} now has an inter-thread data dependency on Node ID {id}"
)

if last_visited_non_gpu:
if last_visited_non_gpu.id not in current_node.data_deps:
current_node.data_deps.append(last_visited_non_gpu.id)
self.logger.debug(
f"CPU Node ID {current_node.id} now has a data "
f"dependency on non-GPU Node ID {last_visited_non_gpu.id}"
)
if (last_visited_non_gpu) and (last_visited_non_gpu.id not in current_node.data_deps):
current_node.data_deps.append(last_visited_non_gpu.id)
self.logger.debug(
f"CPU Node ID {current_node.id} now has a data "
f"dependency on non-GPU Node ID {last_visited_non_gpu.id}"
)
last_visited_non_gpu = current_node
last_visited_any = current_node

Expand Down
12 changes: 5 additions & 7 deletions et_converter/pytorch_node.py → src/converter/pytorch_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ def parse_data(self, node_data: Dict[str, Any]) -> None:
"""
supported_versions = ["1.0.2-chakra.0.0.4", "1.0.3-chakra.0.0.4"]
if self.schema in supported_versions:
if self.schema == "1.0.2-chakra.0.0.4":
self._parse_data_1_0_3_chakra_0_0_4(node_data)
elif self.schema == "1.0.3-chakra.0.0.4":
if self.schema == "1.0.2-chakra.0.0.4" or self.schema == "1.0.3-chakra.0.0.4":
self._parse_data_1_0_3_chakra_0_0_4(node_data)
else:
raise ValueError(
Expand All @@ -96,8 +94,8 @@ def _parse_data_1_0_3_chakra_0_0_4(self, node_data: Dict[str, Any]) -> None:
self.exclusive_dur = node_data.get("exclusive_dur", 0)
self.ts = node_data.get("ts")
self.inter_thread_dep = node_data.get("inter_thread_dep")
self.cat = node_data.get("cat", None)
self.stream = node_data.get("stream", None)
self.cat = node_data.get("cat")
self.stream = node_data.get("stream")

for attr in node_data.get("attrs", []):
setattr(self, attr["name"], attr["value"])
Expand Down Expand Up @@ -235,5 +233,5 @@ def get_data_type_size(data_type: str) -> int:
}
try:
return data_type_size_map[data_type]
except KeyError:
raise ValueError(f"Unsupported data type: {data_type}")
except KeyError as e:
raise ValueError(f"Unsupported data type: {data_type}") from e
File renamed without changes.
Loading
Loading