Skip to content

Commit

Permalink
Enable baseline comparison between workloads from the same path (#447)
Browse files Browse the repository at this point in the history
* Enable baseline comparison between workloads from the same path

Signed-off-by: zichguan-amd <[email protected]>

* Formatting

Signed-off-by: zichguan-amd <[email protected]>

* Change test to verify baseline works with same path

Signed-off-by: zichguan-amd <[email protected]>

* Add three-way comparison test case

Signed-off-by: zichguan-amd <[email protected]>

---------

Signed-off-by: zichguan-amd <[email protected]>
  • Loading branch information
zichguan-amd authored Oct 24, 2024
1 parent 5f3c0f0 commit 13a06f5
Show file tree
Hide file tree
Showing 31 changed files with 492 additions and 41 deletions.
25 changes: 8 additions & 17 deletions src/omniperf_analyze/analysis_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def initalize_runs(self, normalization_filter=None):

self.load_options(normalization_filter)

for d in self.__args.path:
for i, d in enumerate(self.__args.path):
w = schema.Workload()
w.sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv"))
arch = w.sys_info.iloc[0]["gpu_arch"]
Expand All @@ -161,7 +161,7 @@ def initalize_runs(self, normalization_filter=None):
w.avail_ips = w.sys_info["ip_blocks"].item().split("|")
w.dfs = copy.deepcopy(self._arch_configs[arch].dfs)
w.dfs_type = self._arch_configs[arch].dfs_type
self._runs[d[0]] = w
self._runs[i] = w

return self._runs

Expand All @@ -184,15 +184,6 @@ def sanitize(self):
# validate profiling data
is_workload_empty(dir[0])

# no using same paths
occurances = set()
for dir in self.__args.path:
dir = dir[0]
if dir in occurances:
console_error("You cannot provide the same path twice.")
else:
occurances.add(dir)

# ----------------------------------------------------
# Required methods to be implemented by child classes
# ----------------------------------------------------
Expand All @@ -211,20 +202,20 @@ def pre_processing(self):

# set filters
if self.__args.gpu_kernel:
for d, gk in zip(self.__args.path, self.__args.gpu_kernel):
self._runs[d[0]].filter_kernel_ids = gk
for i, gk in enumerate(self.__args.gpu_kernel):
self._runs[i].filter_kernel_ids = gk
if self.__args.gpu_id:
if len(self.__args.gpu_id) == 1 and len(self.__args.path) != 1:
for i in range(len(self.__args.path) - 1):
self.__args.gpu_id.extend(self.__args.gpu_id)
for d, gi in zip(self.__args.path, self.__args.gpu_id):
self._runs[d[0]].filter_gpu_ids = gi
for i, gi in enumerate(self.__args.gpu_id):
self._runs[i].filter_gpu_ids = gi
if self.__args.gpu_dispatch_id:
if len(self.__args.gpu_dispatch_id) == 1 and len(self.__args.path) != 1:
for i in range(len(self.__args.path) - 1):
self.__args.gpu_dispatch_id.extend(self.__args.gpu_dispatch_id)
for d, gd in zip(self.__args.path, self.__args.gpu_dispatch_id):
self._runs[d[0]].filter_dispatch_ids = gd
for i, gd in enumerate(self.__args.gpu_dispatch_id):
self._runs[i].filter_dispatch_ids = gd

@abstractmethod
def run_analysis(self):
Expand Down
22 changes: 8 additions & 14 deletions src/omniperf_analyze/analysis_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,25 @@ def pre_processing(self):
super().pre_processing()
if self.get_args().random_port:
console_error("--gui flag is required to enable --random-port")
for d in self.get_args().path:
for i, d in enumerate(self.get_args().path):
file_io.create_df_kernel_top_stats(
raw_data_dir=d[0],
filter_gpu_ids=self._runs[d[0]].filter_gpu_ids,
filter_dispatch_ids=self._runs[d[0]].filter_dispatch_ids,
filter_gpu_ids=self._runs[i].filter_gpu_ids,
filter_dispatch_ids=self._runs[i].filter_dispatch_ids,
time_unit=self.get_args().time_unit,
max_stat_num=self.get_args().max_stat_num,
kernel_verbose=self.get_args().kernel_verbose,
)
# create 'mega dataframe'
self._runs[d[0]].raw_pmc = file_io.create_df_pmc(
self._runs[i].raw_pmc = file_io.create_df_pmc(
d[0], self.get_args().kernel_verbose, self.get_args().verbose
)
# demangle and overwrite original 'Kernel_Name'
kernel_name_shortener(
self._runs[d[0]].raw_pmc, self.get_args().kernel_verbose
)
kernel_name_shortener(self._runs[i].raw_pmc, self.get_args().kernel_verbose)

# create the loaded table
parser.load_table_data(
workload=self._runs[d[0]],
workload=self._runs[i],
dir=d[0],
is_gui=False,
debug=self.get_args().debug,
Expand All @@ -73,17 +71,13 @@ def run_analysis(self):
tty.show_kernel_stats(
self.get_args(),
self._runs,
self._arch_configs[
self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_arch"]
],
self._arch_configs[self._runs[0].sys_info.iloc[0]["gpu_arch"]],
self._output,
)
else:
tty.show_all(
self.get_args(),
self._runs,
self._arch_configs[
self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_arch"]
],
self._arch_configs[self._runs[0].sys_info.iloc[0]["gpu_arch"]],
self._output,
)
16 changes: 8 additions & 8 deletions src/omniperf_analyze/analysis_webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,20 +274,20 @@ def pre_processing(self):
args = self.get_args()
file_io.create_df_kernel_top_stats(
raw_data_dir=self.dest_dir,
filter_gpu_ids=self._runs[self.dest_dir].filter_gpu_ids,
filter_dispatch_ids=self._runs[self.dest_dir].filter_dispatch_ids,
filter_gpu_ids=self._runs[0].filter_gpu_ids,
filter_dispatch_ids=self._runs[0].filter_dispatch_ids,
time_unit=args.time_unit,
max_stat_num=args.max_stat_num,
kernel_verbose=self.get_args().kernel_verbose,
)
# create 'mega dataframe'
self._runs[self.dest_dir].raw_pmc = file_io.create_df_pmc(
self._runs[0].raw_pmc = file_io.create_df_pmc(
self.dest_dir, self.get_args().kernel_verbose, args.verbose
)
# create the loaded kernel stats
parser.load_kernel_top(self._runs[self.dest_dir], self.dest_dir)
parser.load_kernel_top(self._runs[0], self.dest_dir)
# set architecture
self.arch = self._runs[self.dest_dir].sys_info.iloc[0]["gpu_arch"]
self.arch = self._runs[0].sys_info.iloc[0]["gpu_arch"]

else:
console_error(
Expand All @@ -300,9 +300,9 @@ def run_analysis(self):
super().run_analysis()
args = self.get_args()
input_filters = {
"kernel": self._runs[self.dest_dir].filter_kernel_ids,
"gpu": self._runs[self.dest_dir].filter_gpu_ids,
"dispatch": self._runs[self.dest_dir].filter_dispatch_ids,
"kernel": self._runs[0].filter_kernel_ids,
"gpu": self._runs[0].filter_gpu_ids,
"dispatch": self._runs[0].filter_dispatch_ids,
"normalization": args.normal_unit,
"top_n": args.max_stat_num,
}
Expand Down
50 changes: 48 additions & 2 deletions tests/test_analyze_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,7 +1136,7 @@ def test_baseline():
],
):
omniperf.main()
assert e.value.code == 1
assert e.value.code == 0

with pytest.raises(SystemExit) as e:
with patch(
Expand All @@ -1151,7 +1151,53 @@ def test_baseline():
],
):
omniperf.main()
assert e.value.code == 1
assert e.value.code == 0

with pytest.raises(SystemExit) as e:
with patch(
"sys.argv",
[
"omniperf",
"analyze",
"--path",
"tests/workloads/multikernel/MI200",
"-k",
"0",
"--path",
"tests/workloads/multikernel/MI200",
"-k",
"1",
"--path",
"tests/workloads/multikernel/MI200",
"-k",
"2",
],
):
omniperf.main()
assert e.value.code == 0

with pytest.raises(SystemExit) as e:
with patch(
"sys.argv",
[
"omniperf",
"analyze",
"--path",
"tests/workloads/multikernel/MI200",
"-k",
"0",
"--path",
"tests/workloads/multikernel/MI200",
"-k",
"1",
"--path",
"tests/workloads/vcopy/MI100",
"-k",
"0",
],
):
omniperf.main()
assert e.value.code == 0


@pytest.mark.misc
Expand Down
8 changes: 8 additions & 0 deletions tests/workloads/multikernel/MI200/SQ_IFETCH_LEVEL.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Dispatch_ID,Kernel_Name,GPU_ID,queue-id,queue-index,pid,tid,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,sig,obj,SQ_WAVES,SQ_IFETCH,SQ_IFETCH_LEVEL,SQ_ACCUM_PREV_HIRES,SQ_BUSY_CU_CYCLES,SQ_INSTS_VALU_CVT,SQ_INSTS_VMEM_WR,SQ_INSTS_VMEM_RD,TA_TA_BUSY_sum,TA_BUFFER_WAVEFRONTS_sum,TD_TD_BUSY_sum,TD_TC_STALL_sum,TCP_GATE_EN1_sum,TCP_GATE_EN2_sum,TCP_TD_TCP_STALL_CYCLES_sum,TCP_TCR_TCP_STALL_CYCLES_sum,TCC_CYCLE[0],TCC_RW_REQ[0],TCC_HIT[0],TCC_MISS[0],TCC_CYCLE[1],TCC_RW_REQ[1],TCC_HIT[1],TCC_MISS[1],TCC_CYCLE[2],TCC_RW_REQ[2],TCC_HIT[2],TCC_MISS[2],TCC_CYCLE[3],TCC_RW_REQ[3],TCC_HIT[3],TCC_MISS[3],TCC_CYCLE[4],TCC_RW_REQ[4],TCC_HIT[4],TCC_MISS[4],TCC_CYCLE[5],TCC_RW_REQ[5],TCC_HIT[5],TCC_MISS[5],TCC_CYCLE[6],TCC_RW_REQ[6],TCC_HIT[6],TCC_MISS[6],TCC_CYCLE[7],TCC_RW_REQ[7],TCC_HIT[7],TCC_MISS[7],TCC_CYCLE[8],TCC_RW_REQ[8],TCC_HIT[8],TCC_MISS[8],TCC_CYCLE[9],TCC_RW_REQ[9],TCC_HIT[9],TCC_MISS[9],TCC_CYCLE[10],TCC_RW_REQ[10],TCC_HIT[10],TCC_MISS[10],TCC_CYCLE[11],TCC_RW_REQ[11],TCC_HIT[11],TCC_MISS[11],TCC_CYCLE[12],TCC_RW_REQ[12],TCC_HIT[12],TCC_MISS[12],TCC_CYCLE[13],TCC_RW_REQ[13],TCC_HIT[13],TCC_MISS[13],TCC_CYCLE[14],TCC_RW_REQ[14],TCC_HIT[14],TCC_MISS[14],TCC_CYCLE[15],TCC_RW_REQ[15],TCC_HIT[15],TCC_MISS[15],TCC_CYCLE[16],TCC_RW_REQ[16],TCC_HIT[16],TCC_MISS[16],TCC_CYCLE[17],TCC_RW_REQ[17],TCC_HIT[17],TCC_MISS[17],TCC_CYCLE[18],TCC_RW_REQ[18],TCC_HIT[18],TCC_MISS[18],TCC_CYCLE[19],TCC_RW_REQ[19],TCC_HIT[19],TCC_MISS[19],TCC_CYCLE[20],TCC_RW_REQ[20],TCC_HIT[20],TCC_MISS[20],TCC_CYCLE[21],TCC_RW_REQ[21],TCC_HIT[21],TCC_MISS[21],TCC_CYCLE[22],TCC_RW_REQ[22],TCC_HIT[22],TCC_MISS[22],TCC_CYCLE[23],TCC_RW_REQ[23],TCC_HIT[23],TCC_MISS[23],TCC_CYCLE[24],TCC_RW_REQ[24],TCC_HIT[24],TCC_MISS[24],TCC_CYCLE[25],TCC_RW_REQ[25],TCC_HIT[25],TCC_MISS[25],TCC_CYCLE[26],TCC_RW_REQ[26],TCC_HIT[26],TCC_MISS[26],TCC_CYCLE[27],TCC_RW_REQ[27],TCC_HIT[27],TCC_MISS[27],TCC_CYCLE[28],TCC_RW_REQ[28],TCC_HIT[28],TCC_MISS[28],TCC_CYCLE[29],TCC_RW_REQ[29],TCC_HIT[29],TCC_MISS[29],TCC_CYCLE[30],TCC_RW_REQ[30],TCC_HIT[30],TCC_MISS[30],TCC_CYCLE[31],TCC_RW_REQ[31],TCC_HIT[31],TCC_MISS[31],CPC_CPC_STAT_BUSY,CPC_CPC_STAT_IDLE,CPF_CPF_STAT_BUSY,CPF_CPF_STAT_STALL,SPI_CSN_WINDOW_VALID,SPI_CSN_BUSY,GRBM_COUNT,GRBM_GUI_ACTIVE,DispatchNs,Start_Timestamp,End_Timestamp,CompleteNs
0,__amd_rocclr_fillBufferAligned.kd,2,0,0,789017,789017,256,256,0,0,12,4,32,64,0x0,0x7fc39b83d800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,15492,15492,9586176074970,9595697581667,9595697588067,9586194080958
1,"global_write(int*, int) [clone .kd]",2,0,1,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b805200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,14214,14214,9586194431757,9595697981345,9595697983585,9586195534149
2,"generic_write(int*, int, int) [clone .kd]",2,0,2,789017,789017,1,1,4096,0,4,4,48,64,0x0,0x7fc39b805240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16792,16792,9586194965600,9595697989025,9595697991265,9586196424752
3,"global_read(int*, int) [clone .kd]",2,0,3,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b805280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,17066,17066,9586195545500,9595698033025,9595698035745,9586197503640
4,"generic_read(int*, int, int) [clone .kd]",2,0,4,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b8052c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16835,16835,9586196089011,9595698039905,9595698042305,9586198594409
5,"global_atomic(int*, int) [clone .kd]",2,0,5,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b805300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16490,16490,9586196678469,9595698072385,9595698076225,9586199643230
6,"generic_atomic(int*, int, int) [clone .kd]",2,0,6,789017,789017,1,1,4096,0,4,4,48,64,0x0,0x7fc39b805340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,17235,17235,9586197190792,9595698076225,9595698080065,9586200731796
Loading

0 comments on commit 13a06f5

Please sign in to comment.