From 13a06f51318c7862858ae90957263d7f1e93d035 Mon Sep 17 00:00:00 2001 From: zichguan-amd Date: Thu, 24 Oct 2024 13:00:36 -0400 Subject: [PATCH] Enable baseline comparison between workloads from the same path (#447) * Enable baseline comparison between workloads from the same path Signed-off-by: zichguan-amd * Formatting Signed-off-by: zichguan-amd * Change test to verify baseline works with same path Signed-off-by: zichguan-amd * Add three-way comparison test case Signed-off-by: zichguan-amd --------- Signed-off-by: zichguan-amd --- src/omniperf_analyze/analysis_base.py | 25 +- src/omniperf_analyze/analysis_cli.py | 22 +- src/omniperf_analyze/analysis_webui.py | 16 +- tests/test_analyze_commands.py | 50 +++- .../multikernel/MI200/SQ_IFETCH_LEVEL.csv | 8 + .../multikernel/MI200/SQ_INST_LEVEL_LDS.csv | 8 + .../multikernel/MI200/SQ_INST_LEVEL_SMEM.csv | 8 + .../multikernel/MI200/SQ_INST_LEVEL_VMEM.csv | 8 + .../multikernel/MI200/SQ_LEVEL_WAVES.csv | 8 + tests/workloads/multikernel/MI200/log.txt | 270 ++++++++++++++++++ .../MI200/perfmon/SQ_IFETCH_LEVEL.txt | 5 + .../MI200/perfmon/SQ_INST_LEVEL_LDS.txt | 5 + .../MI200/perfmon/SQ_INST_LEVEL_SMEM.txt | 5 + .../MI200/perfmon/SQ_INST_LEVEL_VMEM.txt | 5 + .../MI200/perfmon/SQ_LEVEL_WAVES.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_0.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_1.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_2.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_3.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_4.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_5.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_6.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_7.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_8.txt | 5 + .../multikernel/MI200/perfmon/pmc_perf_9.txt | 5 + .../multikernel/MI200/perfmon/timestamps.txt | 5 + .../multikernel/MI200/pmc_dispatch_info.csv | 8 + .../workloads/multikernel/MI200/pmc_perf.csv | 8 + .../workloads/multikernel/MI200/roofline.csv | 4 + tests/workloads/multikernel/MI200/sysinfo.csv | 2 + .../multikernel/MI200/timestamps.csv | 8 + 31 files changed, 492 insertions(+), 41 deletions(-) create mode 100644 tests/workloads/multikernel/MI200/SQ_IFETCH_LEVEL.csv create mode 100644 tests/workloads/multikernel/MI200/SQ_INST_LEVEL_LDS.csv create mode 100644 tests/workloads/multikernel/MI200/SQ_INST_LEVEL_SMEM.csv create mode 100644 tests/workloads/multikernel/MI200/SQ_INST_LEVEL_VMEM.csv create mode 100644 tests/workloads/multikernel/MI200/SQ_LEVEL_WAVES.csv create mode 100644 tests/workloads/multikernel/MI200/log.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/SQ_IFETCH_LEVEL.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_LDS.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_SMEM.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_VMEM.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/SQ_LEVEL_WAVES.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_0.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_1.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_2.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_3.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_4.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_5.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_6.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_7.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_8.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/pmc_perf_9.txt create mode 100644 tests/workloads/multikernel/MI200/perfmon/timestamps.txt create mode 100644 tests/workloads/multikernel/MI200/pmc_dispatch_info.csv create mode 100644 tests/workloads/multikernel/MI200/pmc_perf.csv create mode 100644 tests/workloads/multikernel/MI200/roofline.csv create mode 100644 tests/workloads/multikernel/MI200/sysinfo.csv create mode 100644 tests/workloads/multikernel/MI200/timestamps.csv diff --git a/src/omniperf_analyze/analysis_base.py b/src/omniperf_analyze/analysis_base.py index c80489972..eb0a67225 100644 --- a/src/omniperf_analyze/analysis_base.py +++ b/src/omniperf_analyze/analysis_base.py @@ -151,7 +151,7 @@ def initalize_runs(self, normalization_filter=None): self.load_options(normalization_filter) - for d in self.__args.path: + for i, d in enumerate(self.__args.path): w = schema.Workload() w.sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv")) arch = w.sys_info.iloc[0]["gpu_arch"] @@ -161,7 +161,7 @@ def initalize_runs(self, normalization_filter=None): w.avail_ips = w.sys_info["ip_blocks"].item().split("|") w.dfs = copy.deepcopy(self._arch_configs[arch].dfs) w.dfs_type = self._arch_configs[arch].dfs_type - self._runs[d[0]] = w + self._runs[i] = w return self._runs @@ -184,15 +184,6 @@ def sanitize(self): # validate profiling data is_workload_empty(dir[0]) - # no using same paths - occurances = set() - for dir in self.__args.path: - dir = dir[0] - if dir in occurances: - console_error("You cannot provide the same path twice.") - else: - occurances.add(dir) - # ---------------------------------------------------- # Required methods to be implemented by child classes # ---------------------------------------------------- @@ -211,20 +202,20 @@ def pre_processing(self): # set filters if self.__args.gpu_kernel: - for d, gk in zip(self.__args.path, self.__args.gpu_kernel): - self._runs[d[0]].filter_kernel_ids = gk + for i, gk in enumerate(self.__args.gpu_kernel): + self._runs[i].filter_kernel_ids = gk if self.__args.gpu_id: if len(self.__args.gpu_id) == 1 and len(self.__args.path) != 1: for i in range(len(self.__args.path) - 1): self.__args.gpu_id.extend(self.__args.gpu_id) - for d, gi in zip(self.__args.path, self.__args.gpu_id): - self._runs[d[0]].filter_gpu_ids = gi + for i, gi in enumerate(self.__args.gpu_id): + self._runs[i].filter_gpu_ids = gi if self.__args.gpu_dispatch_id: if len(self.__args.gpu_dispatch_id) == 1 and len(self.__args.path) != 1: for i in range(len(self.__args.path) - 1): self.__args.gpu_dispatch_id.extend(self.__args.gpu_dispatch_id) - for d, gd in zip(self.__args.path, self.__args.gpu_dispatch_id): - self._runs[d[0]].filter_dispatch_ids = gd + for i, gd in enumerate(self.__args.gpu_dispatch_id): + self._runs[i].filter_dispatch_ids = gd @abstractmethod def run_analysis(self): diff --git a/src/omniperf_analyze/analysis_cli.py b/src/omniperf_analyze/analysis_cli.py index 36fbbf8c5..549653c62 100644 --- a/src/omniperf_analyze/analysis_cli.py +++ b/src/omniperf_analyze/analysis_cli.py @@ -38,27 +38,25 @@ def pre_processing(self): super().pre_processing() if self.get_args().random_port: console_error("--gui flag is required to enable --random-port") - for d in self.get_args().path: + for i, d in enumerate(self.get_args().path): file_io.create_df_kernel_top_stats( raw_data_dir=d[0], - filter_gpu_ids=self._runs[d[0]].filter_gpu_ids, - filter_dispatch_ids=self._runs[d[0]].filter_dispatch_ids, + filter_gpu_ids=self._runs[i].filter_gpu_ids, + filter_dispatch_ids=self._runs[i].filter_dispatch_ids, time_unit=self.get_args().time_unit, max_stat_num=self.get_args().max_stat_num, kernel_verbose=self.get_args().kernel_verbose, ) # create 'mega dataframe' - self._runs[d[0]].raw_pmc = file_io.create_df_pmc( + self._runs[i].raw_pmc = file_io.create_df_pmc( d[0], self.get_args().kernel_verbose, self.get_args().verbose ) # demangle and overwrite original 'Kernel_Name' - kernel_name_shortener( - self._runs[d[0]].raw_pmc, self.get_args().kernel_verbose - ) + kernel_name_shortener(self._runs[i].raw_pmc, self.get_args().kernel_verbose) # create the loaded table parser.load_table_data( - workload=self._runs[d[0]], + workload=self._runs[i], dir=d[0], is_gui=False, debug=self.get_args().debug, @@ -73,17 +71,13 @@ def run_analysis(self): tty.show_kernel_stats( self.get_args(), self._runs, - self._arch_configs[ - self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_arch"] - ], + self._arch_configs[self._runs[0].sys_info.iloc[0]["gpu_arch"]], self._output, ) else: tty.show_all( self.get_args(), self._runs, - self._arch_configs[ - self._runs[self.get_args().path[0][0]].sys_info.iloc[0]["gpu_arch"] - ], + self._arch_configs[self._runs[0].sys_info.iloc[0]["gpu_arch"]], self._output, ) diff --git a/src/omniperf_analyze/analysis_webui.py b/src/omniperf_analyze/analysis_webui.py index 54645cbbb..bdf7ff782 100644 --- a/src/omniperf_analyze/analysis_webui.py +++ b/src/omniperf_analyze/analysis_webui.py @@ -274,20 +274,20 @@ def pre_processing(self): args = self.get_args() file_io.create_df_kernel_top_stats( raw_data_dir=self.dest_dir, - filter_gpu_ids=self._runs[self.dest_dir].filter_gpu_ids, - filter_dispatch_ids=self._runs[self.dest_dir].filter_dispatch_ids, + filter_gpu_ids=self._runs[0].filter_gpu_ids, + filter_dispatch_ids=self._runs[0].filter_dispatch_ids, time_unit=args.time_unit, max_stat_num=args.max_stat_num, kernel_verbose=self.get_args().kernel_verbose, ) # create 'mega dataframe' - self._runs[self.dest_dir].raw_pmc = file_io.create_df_pmc( + self._runs[0].raw_pmc = file_io.create_df_pmc( self.dest_dir, self.get_args().kernel_verbose, args.verbose ) # create the loaded kernel stats - parser.load_kernel_top(self._runs[self.dest_dir], self.dest_dir) + parser.load_kernel_top(self._runs[0], self.dest_dir) # set architecture - self.arch = self._runs[self.dest_dir].sys_info.iloc[0]["gpu_arch"] + self.arch = self._runs[0].sys_info.iloc[0]["gpu_arch"] else: console_error( @@ -300,9 +300,9 @@ def run_analysis(self): super().run_analysis() args = self.get_args() input_filters = { - "kernel": self._runs[self.dest_dir].filter_kernel_ids, - "gpu": self._runs[self.dest_dir].filter_gpu_ids, - "dispatch": self._runs[self.dest_dir].filter_dispatch_ids, + "kernel": self._runs[0].filter_kernel_ids, + "gpu": self._runs[0].filter_gpu_ids, + "dispatch": self._runs[0].filter_dispatch_ids, "normalization": args.normal_unit, "top_n": args.max_stat_num, } diff --git a/tests/test_analyze_commands.py b/tests/test_analyze_commands.py index 5a0e7aec8..303e60b0a 100644 --- a/tests/test_analyze_commands.py +++ b/tests/test_analyze_commands.py @@ -1136,7 +1136,7 @@ def test_baseline(): ], ): omniperf.main() - assert e.value.code == 1 + assert e.value.code == 0 with pytest.raises(SystemExit) as e: with patch( @@ -1151,7 +1151,53 @@ def test_baseline(): ], ): omniperf.main() - assert e.value.code == 1 + assert e.value.code == 0 + + with pytest.raises(SystemExit) as e: + with patch( + "sys.argv", + [ + "omniperf", + "analyze", + "--path", + "tests/workloads/multikernel/MI200", + "-k", + "0", + "--path", + "tests/workloads/multikernel/MI200", + "-k", + "1", + "--path", + "tests/workloads/multikernel/MI200", + "-k", + "2", + ], + ): + omniperf.main() + assert e.value.code == 0 + + with pytest.raises(SystemExit) as e: + with patch( + "sys.argv", + [ + "omniperf", + "analyze", + "--path", + "tests/workloads/multikernel/MI200", + "-k", + "0", + "--path", + "tests/workloads/multikernel/MI200", + "-k", + "1", + "--path", + "tests/workloads/vcopy/MI100", + "-k", + "0", + ], + ): + omniperf.main() + assert e.value.code == 0 @pytest.mark.misc diff --git a/tests/workloads/multikernel/MI200/SQ_IFETCH_LEVEL.csv b/tests/workloads/multikernel/MI200/SQ_IFETCH_LEVEL.csv new file mode 100644 index 000000000..cb500c756 --- /dev/null +++ b/tests/workloads/multikernel/MI200/SQ_IFETCH_LEVEL.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID,queue-id,queue-index,pid,tid,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,sig,obj,SQ_WAVES,SQ_IFETCH,SQ_IFETCH_LEVEL,SQ_ACCUM_PREV_HIRES,SQ_BUSY_CU_CYCLES,SQ_INSTS_VALU_CVT,SQ_INSTS_VMEM_WR,SQ_INSTS_VMEM_RD,TA_TA_BUSY_sum,TA_BUFFER_WAVEFRONTS_sum,TD_TD_BUSY_sum,TD_TC_STALL_sum,TCP_GATE_EN1_sum,TCP_GATE_EN2_sum,TCP_TD_TCP_STALL_CYCLES_sum,TCP_TCR_TCP_STALL_CYCLES_sum,TCC_CYCLE[0],TCC_RW_REQ[0],TCC_HIT[0],TCC_MISS[0],TCC_CYCLE[1],TCC_RW_REQ[1],TCC_HIT[1],TCC_MISS[1],TCC_CYCLE[2],TCC_RW_REQ[2],TCC_HIT[2],TCC_MISS[2],TCC_CYCLE[3],TCC_RW_REQ[3],TCC_HIT[3],TCC_MISS[3],TCC_CYCLE[4],TCC_RW_REQ[4],TCC_HIT[4],TCC_MISS[4],TCC_CYCLE[5],TCC_RW_REQ[5],TCC_HIT[5],TCC_MISS[5],TCC_CYCLE[6],TCC_RW_REQ[6],TCC_HIT[6],TCC_MISS[6],TCC_CYCLE[7],TCC_RW_REQ[7],TCC_HIT[7],TCC_MISS[7],TCC_CYCLE[8],TCC_RW_REQ[8],TCC_HIT[8],TCC_MISS[8],TCC_CYCLE[9],TCC_RW_REQ[9],TCC_HIT[9],TCC_MISS[9],TCC_CYCLE[10],TCC_RW_REQ[10],TCC_HIT[10],TCC_MISS[10],TCC_CYCLE[11],TCC_RW_REQ[11],TCC_HIT[11],TCC_MISS[11],TCC_CYCLE[12],TCC_RW_REQ[12],TCC_HIT[12],TCC_MISS[12],TCC_CYCLE[13],TCC_RW_REQ[13],TCC_HIT[13],TCC_MISS[13],TCC_CYCLE[14],TCC_RW_REQ[14],TCC_HIT[14],TCC_MISS[14],TCC_CYCLE[15],TCC_RW_REQ[15],TCC_HIT[15],TCC_MISS[15],TCC_CYCLE[16],TCC_RW_REQ[16],TCC_HIT[16],TCC_MISS[16],TCC_CYCLE[17],TCC_RW_REQ[17],TCC_HIT[17],TCC_MISS[17],TCC_CYCLE[18],TCC_RW_REQ[18],TCC_HIT[18],TCC_MISS[18],TCC_CYCLE[19],TCC_RW_REQ[19],TCC_HIT[19],TCC_MISS[19],TCC_CYCLE[20],TCC_RW_REQ[20],TCC_HIT[20],TCC_MISS[20],TCC_CYCLE[21],TCC_RW_REQ[21],TCC_HIT[21],TCC_MISS[21],TCC_CYCLE[22],TCC_RW_REQ[22],TCC_HIT[22],TCC_MISS[22],TCC_CYCLE[23],TCC_RW_REQ[23],TCC_HIT[23],TCC_MISS[23],TCC_CYCLE[24],TCC_RW_REQ[24],TCC_HIT[24],TCC_MISS[24],TCC_CYCLE[25],TCC_RW_REQ[25],TCC_HIT[25],TCC_MISS[25],TCC_CYCLE[26],TCC_RW_REQ[26],TCC_HIT[26],TCC_MISS[26],TCC_CYCLE[27],TCC_RW_REQ[27],TCC_HIT[27],TCC_MISS[27],TCC_CYCLE[28],TCC_RW_REQ[28],TCC_HIT[28],TCC_MISS[28],TCC_CYCLE[29],TCC_RW_REQ[29],TCC_HIT[29],TCC_MISS[29],TCC_CYCLE[30],TCC_RW_REQ[30],TCC_HIT[30],TCC_MISS[30],TCC_CYCLE[31],TCC_RW_REQ[31],TCC_HIT[31],TCC_MISS[31],CPC_CPC_STAT_BUSY,CPC_CPC_STAT_IDLE,CPF_CPF_STAT_BUSY,CPF_CPF_STAT_STALL,SPI_CSN_WINDOW_VALID,SPI_CSN_BUSY,GRBM_COUNT,GRBM_GUI_ACTIVE,DispatchNs,Start_Timestamp,End_Timestamp,CompleteNs +0,__amd_rocclr_fillBufferAligned.kd,2,0,0,789017,789017,256,256,0,0,12,4,32,64,0x0,0x7fc39b83d800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,15492,15492,9586176074970,9595697581667,9595697588067,9586194080958 +1,"global_write(int*, int) [clone .kd]",2,0,1,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b805200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,14214,14214,9586194431757,9595697981345,9595697983585,9586195534149 +2,"generic_write(int*, int, int) [clone .kd]",2,0,2,789017,789017,1,1,4096,0,4,4,48,64,0x0,0x7fc39b805240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16792,16792,9586194965600,9595697989025,9595697991265,9586196424752 +3,"global_read(int*, int) [clone .kd]",2,0,3,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b805280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,17066,17066,9586195545500,9595698033025,9595698035745,9586197503640 +4,"generic_read(int*, int, int) [clone .kd]",2,0,4,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b8052c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16835,16835,9586196089011,9595698039905,9595698042305,9586198594409 +5,"global_atomic(int*, int) [clone .kd]",2,0,5,789017,789017,1,1,0,0,4,4,16,64,0x0,0x7fc39b805300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16490,16490,9586196678469,9595698072385,9595698076225,9586199643230 +6,"generic_atomic(int*, int, int) [clone .kd]",2,0,6,789017,789017,1,1,4096,0,4,4,48,64,0x0,0x7fc39b805340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,17235,17235,9586197190792,9595698076225,9595698080065,9586200731796 diff --git a/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_LDS.csv b/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_LDS.csv new file mode 100644 index 000000000..1a975cb38 --- /dev/null +++ b/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_LDS.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID,queue-id,queue-index,pid,tid,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,sig,obj,SQ_INSTS_LDS,SQ_INST_LEVEL_LDS,SQ_ACCUM_PREV_HIRES,SQ_INSTS_SALU,SQ_INSTS_VSKIPPED,SQ_INSTS,SQ_INSTS_VALU,SQ_INSTS_VALU_ADD_F16,TA_BUFFER_READ_WAVEFRONTS_sum,TA_BUFFER_WRITE_WAVEFRONTS_sum,TD_SPI_STALL_sum,TD_LOAD_WAVEFRONT_sum,TCP_READ_TAGCONFLICT_STALL_CYCLES_sum,TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum,TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum,TCP_TA_TCP_STATE_READ_sum,TCC_REQ[0],TCC_READ[0],TCC_WRITE[0],TCC_ATOMIC[0],TCC_REQ[1],TCC_READ[1],TCC_WRITE[1],TCC_ATOMIC[1],TCC_REQ[2],TCC_READ[2],TCC_WRITE[2],TCC_ATOMIC[2],TCC_REQ[3],TCC_READ[3],TCC_WRITE[3],TCC_ATOMIC[3],TCC_REQ[4],TCC_READ[4],TCC_WRITE[4],TCC_ATOMIC[4],TCC_REQ[5],TCC_READ[5],TCC_WRITE[5],TCC_ATOMIC[5],TCC_REQ[6],TCC_READ[6],TCC_WRITE[6],TCC_ATOMIC[6],TCC_REQ[7],TCC_READ[7],TCC_WRITE[7],TCC_ATOMIC[7],TCC_REQ[8],TCC_READ[8],TCC_WRITE[8],TCC_ATOMIC[8],TCC_REQ[9],TCC_READ[9],TCC_WRITE[9],TCC_ATOMIC[9],TCC_REQ[10],TCC_READ[10],TCC_WRITE[10],TCC_ATOMIC[10],TCC_REQ[11],TCC_READ[11],TCC_WRITE[11],TCC_ATOMIC[11],TCC_REQ[12],TCC_READ[12],TCC_WRITE[12],TCC_ATOMIC[12],TCC_REQ[13],TCC_READ[13],TCC_WRITE[13],TCC_ATOMIC[13],TCC_REQ[14],TCC_READ[14],TCC_WRITE[14],TCC_ATOMIC[14],TCC_REQ[15],TCC_READ[15],TCC_WRITE[15],TCC_ATOMIC[15],TCC_REQ[16],TCC_READ[16],TCC_WRITE[16],TCC_ATOMIC[16],TCC_REQ[17],TCC_READ[17],TCC_WRITE[17],TCC_ATOMIC[17],TCC_REQ[18],TCC_READ[18],TCC_WRITE[18],TCC_ATOMIC[18],TCC_REQ[19],TCC_READ[19],TCC_WRITE[19],TCC_ATOMIC[19],TCC_REQ[20],TCC_READ[20],TCC_WRITE[20],TCC_ATOMIC[20],TCC_REQ[21],TCC_READ[21],TCC_WRITE[21],TCC_ATOMIC[21],TCC_REQ[22],TCC_READ[22],TCC_WRITE[22],TCC_ATOMIC[22],TCC_REQ[23],TCC_READ[23],TCC_WRITE[23],TCC_ATOMIC[23],TCC_REQ[24],TCC_READ[24],TCC_WRITE[24],TCC_ATOMIC[24],TCC_REQ[25],TCC_READ[25],TCC_WRITE[25],TCC_ATOMIC[25],TCC_REQ[26],TCC_READ[26],TCC_WRITE[26],TCC_ATOMIC[26],TCC_REQ[27],TCC_READ[27],TCC_WRITE[27],TCC_ATOMIC[27],TCC_REQ[28],TCC_READ[28],TCC_WRITE[28],TCC_ATOMIC[28],TCC_REQ[29],TCC_READ[29],TCC_WRITE[29],TCC_ATOMIC[29],TCC_REQ[30],TCC_READ[30],TCC_WRITE[30],TCC_ATOMIC[30],TCC_REQ[31],TCC_READ[31],TCC_WRITE[31],TCC_ATOMIC[31],CPC_CPC_TCIU_BUSY,CPC_CPC_TCIU_IDLE,CPF_CPF_TCIU_BUSY,CPF_CPF_TCIU_STALL,SPI_CSN_NUM_THREADGROUPS,SPI_CSN_WAVE,GRBM_SPI_BUSY,DispatchNs,Start_Timestamp,End_Timestamp,CompleteNs +0,__amd_rocclr_fillBufferAligned.kd,2,0,0,789206,789206,256,256,0,0,12,4,32,64,0x0,0x7f092c135800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,7832,9586813518158,9595697581667,9595697588067,9586831878942 +1,"global_write(int*, int) [clone .kd]",2,0,1,789206,789206,1,1,0,0,4,4,16,64,0x0,0x7f092c0f9200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,4807,9586832277231,9595697981345,9595697983585,9586833234169 +2,"generic_write(int*, int, int) [clone .kd]",2,0,2,789206,789206,1,1,4096,0,4,4,48,64,0x0,0x7f092c0f9240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6147,9586832819029,9595697989025,9595697991265,9586834206386 +3,"global_read(int*, int) [clone .kd]",2,0,3,789206,789206,1,1,0,0,4,4,16,64,0x0,0x7f092c0f9280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6439,9586833482506,9595698033025,9595698035745,9586835298047 +4,"generic_read(int*, int, int) [clone .kd]",2,0,4,789206,789206,1,1,0,0,4,4,16,64,0x0,0x7f092c0f92c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6821,9586834006330,9595698039905,9595698042305,9586836377766 +5,"global_atomic(int*, int) [clone .kd]",2,0,5,789206,789206,1,1,0,0,4,4,16,64,0x0,0x7f092c0f9300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6708,9586834769875,9595698072385,9595698076225,9586837446956 +6,"generic_atomic(int*, int, int) [clone .kd]",2,0,6,789206,789206,1,1,4096,0,4,4,48,64,0x0,0x7f092c0f9340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,7838,9586835336510,9595698076225,9595698080065,9586838514602 diff --git a/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_SMEM.csv b/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_SMEM.csv new file mode 100644 index 000000000..8eae1c587 --- /dev/null +++ b/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_SMEM.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID,queue-id,queue-index,pid,tid,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,sig,obj,SQ_INSTS_SMEM,SQ_INST_LEVEL_SMEM,SQ_ACCUM_PREV_HIRES,SQ_INSTS_VALU_MUL_F16,SQ_INSTS_VALU_FMA_F16,SQ_INSTS_VALU_TRANS_F16,SQ_INSTS_VALU_ADD_F32,SQ_INSTS_VALU_MUL_F32,TA_BUFFER_ATOMIC_WAVEFRONTS_sum,TA_BUFFER_TOTAL_CYCLES_sum,TD_ATOMIC_WAVEFRONT_sum,TD_STORE_WAVEFRONT_sum,TCP_VOLATILE_sum,TCP_TOTAL_ACCESSES_sum,TCP_TOTAL_READ_sum,TCP_TOTAL_WRITE_sum,TCC_EA_RDREQ[0],TCC_EA_RDREQ_32B[0],TCC_EA_WRREQ[0],TCC_EA_WRREQ_64B[0],TCC_EA_RDREQ[1],TCC_EA_RDREQ_32B[1],TCC_EA_WRREQ[1],TCC_EA_WRREQ_64B[1],TCC_EA_RDREQ[2],TCC_EA_RDREQ_32B[2],TCC_EA_WRREQ[2],TCC_EA_WRREQ_64B[2],TCC_EA_RDREQ[3],TCC_EA_RDREQ_32B[3],TCC_EA_WRREQ[3],TCC_EA_WRREQ_64B[3],TCC_EA_RDREQ[4],TCC_EA_RDREQ_32B[4],TCC_EA_WRREQ[4],TCC_EA_WRREQ_64B[4],TCC_EA_RDREQ[5],TCC_EA_RDREQ_32B[5],TCC_EA_WRREQ[5],TCC_EA_WRREQ_64B[5],TCC_EA_RDREQ[6],TCC_EA_RDREQ_32B[6],TCC_EA_WRREQ[6],TCC_EA_WRREQ_64B[6],TCC_EA_RDREQ[7],TCC_EA_RDREQ_32B[7],TCC_EA_WRREQ[7],TCC_EA_WRREQ_64B[7],TCC_EA_RDREQ[8],TCC_EA_RDREQ_32B[8],TCC_EA_WRREQ[8],TCC_EA_WRREQ_64B[8],TCC_EA_RDREQ[9],TCC_EA_RDREQ_32B[9],TCC_EA_WRREQ[9],TCC_EA_WRREQ_64B[9],TCC_EA_RDREQ[10],TCC_EA_RDREQ_32B[10],TCC_EA_WRREQ[10],TCC_EA_WRREQ_64B[10],TCC_EA_RDREQ[11],TCC_EA_RDREQ_32B[11],TCC_EA_WRREQ[11],TCC_EA_WRREQ_64B[11],TCC_EA_RDREQ[12],TCC_EA_RDREQ_32B[12],TCC_EA_WRREQ[12],TCC_EA_WRREQ_64B[12],TCC_EA_RDREQ[13],TCC_EA_RDREQ_32B[13],TCC_EA_WRREQ[13],TCC_EA_WRREQ_64B[13],TCC_EA_RDREQ[14],TCC_EA_RDREQ_32B[14],TCC_EA_WRREQ[14],TCC_EA_WRREQ_64B[14],TCC_EA_RDREQ[15],TCC_EA_RDREQ_32B[15],TCC_EA_WRREQ[15],TCC_EA_WRREQ_64B[15],TCC_EA_RDREQ[16],TCC_EA_RDREQ_32B[16],TCC_EA_WRREQ[16],TCC_EA_WRREQ_64B[16],TCC_EA_RDREQ[17],TCC_EA_RDREQ_32B[17],TCC_EA_WRREQ[17],TCC_EA_WRREQ_64B[17],TCC_EA_RDREQ[18],TCC_EA_RDREQ_32B[18],TCC_EA_WRREQ[18],TCC_EA_WRREQ_64B[18],TCC_EA_RDREQ[19],TCC_EA_RDREQ_32B[19],TCC_EA_WRREQ[19],TCC_EA_WRREQ_64B[19],TCC_EA_RDREQ[20],TCC_EA_RDREQ_32B[20],TCC_EA_WRREQ[20],TCC_EA_WRREQ_64B[20],TCC_EA_RDREQ[21],TCC_EA_RDREQ_32B[21],TCC_EA_WRREQ[21],TCC_EA_WRREQ_64B[21],TCC_EA_RDREQ[22],TCC_EA_RDREQ_32B[22],TCC_EA_WRREQ[22],TCC_EA_WRREQ_64B[22],TCC_EA_RDREQ[23],TCC_EA_RDREQ_32B[23],TCC_EA_WRREQ[23],TCC_EA_WRREQ_64B[23],TCC_EA_RDREQ[24],TCC_EA_RDREQ_32B[24],TCC_EA_WRREQ[24],TCC_EA_WRREQ_64B[24],TCC_EA_RDREQ[25],TCC_EA_RDREQ_32B[25],TCC_EA_WRREQ[25],TCC_EA_WRREQ_64B[25],TCC_EA_RDREQ[26],TCC_EA_RDREQ_32B[26],TCC_EA_WRREQ[26],TCC_EA_WRREQ_64B[26],TCC_EA_RDREQ[27],TCC_EA_RDREQ_32B[27],TCC_EA_WRREQ[27],TCC_EA_WRREQ_64B[27],TCC_EA_RDREQ[28],TCC_EA_RDREQ_32B[28],TCC_EA_WRREQ[28],TCC_EA_WRREQ_64B[28],TCC_EA_RDREQ[29],TCC_EA_RDREQ_32B[29],TCC_EA_WRREQ[29],TCC_EA_WRREQ_64B[29],TCC_EA_RDREQ[30],TCC_EA_RDREQ_32B[30],TCC_EA_WRREQ[30],TCC_EA_WRREQ_64B[30],TCC_EA_RDREQ[31],TCC_EA_RDREQ_32B[31],TCC_EA_WRREQ[31],TCC_EA_WRREQ_64B[31],CPC_CPC_STAT_STALL,CPC_UTCL1_STALL_ON_TRANSLATION,CPF_CPF_STAT_IDLE,CPF_CPF_TCIU_IDLE,SPI_RA_REQ_NO_ALLOC,SPI_RA_REQ_NO_ALLOC_CSN,DispatchNs,Start_Timestamp,End_Timestamp,CompleteNs +0,__amd_rocclr_fillBufferAligned.kd,2,0,0,789393,789393,256,256,0,0,12,4,32,64,0x0,0x7f96ea49d800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,9587521927328,9595697581667,9595697588067,9587540188696 +1,"global_write(int*, int) [clone .kd]",2,0,1,789393,789393,1,1,0,0,4,4,16,64,0x0,0x7f96ea465200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,9587540565083,9595697981345,9595697983585,9587541639562 +2,"generic_write(int*, int, int) [clone .kd]",2,0,2,789393,789393,1,1,4096,0,4,4,48,64,0x0,0x7f96ea465240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,9587541115507,9595697989025,9595697991265,9587542609585 +3,"global_read(int*, int) [clone .kd]",2,0,3,789393,789393,1,1,0,0,4,4,16,64,0x0,0x7f96ea465280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,9587541696489,9595698033025,9595698035745,9587543620214 +4,"generic_read(int*, int, int) [clone .kd]",2,0,4,789393,789393,1,1,0,0,4,4,16,64,0x0,0x7f96ea4652c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,9587542237877,9595698039905,9595698042305,9587544716895 +5,"global_atomic(int*, int) [clone .kd]",2,0,5,789393,789393,1,1,0,0,4,4,16,64,0x0,0x7f96ea465300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,9587542873531,9595698072385,9595698076225,9587545756779 +6,"generic_atomic(int*, int, int) [clone .kd]",2,0,6,789393,789393,1,1,4096,0,4,4,48,64,0x0,0x7f96ea465340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,9587543404819,9595698076225,9595698080065,9587546843111 diff --git a/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_VMEM.csv b/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_VMEM.csv new file mode 100644 index 000000000..c4c4af793 --- /dev/null +++ b/tests/workloads/multikernel/MI200/SQ_INST_LEVEL_VMEM.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID,queue-id,queue-index,pid,tid,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,sig,obj,SQ_INSTS_VMEM,SQ_INST_LEVEL_VMEM,SQ_ACCUM_PREV_HIRES,SQ_INSTS_VALU_FMA_F32,SQ_INSTS_VALU_TRANS_F32,SQ_INSTS_VALU_ADD_F64,SQ_INSTS_VALU_MUL_F64,SQ_INSTS_VALU_FMA_F64,TA_BUFFER_COALESCED_READ_CYCLES_sum,TA_BUFFER_COALESCED_WRITE_CYCLES_sum,TD_COALESCABLE_WAVEFRONT_sum,TCP_TOTAL_ATOMIC_WITH_RET_sum,TCP_TOTAL_ATOMIC_WITHOUT_RET_sum,TCP_TOTAL_WRITEBACK_INVALIDATES_sum,TCP_TOTAL_CACHE_ACCESSES_sum,TCC_EA_ATOMIC[0],TCC_EA_RDREQ_LEVEL[0],TCC_EA_WRREQ_LEVEL[0],TCC_EA_ATOMIC_LEVEL[0],TCC_EA_ATOMIC[1],TCC_EA_RDREQ_LEVEL[1],TCC_EA_WRREQ_LEVEL[1],TCC_EA_ATOMIC_LEVEL[1],TCC_EA_ATOMIC[2],TCC_EA_RDREQ_LEVEL[2],TCC_EA_WRREQ_LEVEL[2],TCC_EA_ATOMIC_LEVEL[2],TCC_EA_ATOMIC[3],TCC_EA_RDREQ_LEVEL[3],TCC_EA_WRREQ_LEVEL[3],TCC_EA_ATOMIC_LEVEL[3],TCC_EA_ATOMIC[4],TCC_EA_RDREQ_LEVEL[4],TCC_EA_WRREQ_LEVEL[4],TCC_EA_ATOMIC_LEVEL[4],TCC_EA_ATOMIC[5],TCC_EA_RDREQ_LEVEL[5],TCC_EA_WRREQ_LEVEL[5],TCC_EA_ATOMIC_LEVEL[5],TCC_EA_ATOMIC[6],TCC_EA_RDREQ_LEVEL[6],TCC_EA_WRREQ_LEVEL[6],TCC_EA_ATOMIC_LEVEL[6],TCC_EA_ATOMIC[7],TCC_EA_RDREQ_LEVEL[7],TCC_EA_WRREQ_LEVEL[7],TCC_EA_ATOMIC_LEVEL[7],TCC_EA_ATOMIC[8],TCC_EA_RDREQ_LEVEL[8],TCC_EA_WRREQ_LEVEL[8],TCC_EA_ATOMIC_LEVEL[8],TCC_EA_ATOMIC[9],TCC_EA_RDREQ_LEVEL[9],TCC_EA_WRREQ_LEVEL[9],TCC_EA_ATOMIC_LEVEL[9],TCC_EA_ATOMIC[10],TCC_EA_RDREQ_LEVEL[10],TCC_EA_WRREQ_LEVEL[10],TCC_EA_ATOMIC_LEVEL[10],TCC_EA_ATOMIC[11],TCC_EA_RDREQ_LEVEL[11],TCC_EA_WRREQ_LEVEL[11],TCC_EA_ATOMIC_LEVEL[11],TCC_EA_ATOMIC[12],TCC_EA_RDREQ_LEVEL[12],TCC_EA_WRREQ_LEVEL[12],TCC_EA_ATOMIC_LEVEL[12],TCC_EA_ATOMIC[13],TCC_EA_RDREQ_LEVEL[13],TCC_EA_WRREQ_LEVEL[13],TCC_EA_ATOMIC_LEVEL[13],TCC_EA_ATOMIC[14],TCC_EA_RDREQ_LEVEL[14],TCC_EA_WRREQ_LEVEL[14],TCC_EA_ATOMIC_LEVEL[14],TCC_EA_ATOMIC[15],TCC_EA_RDREQ_LEVEL[15],TCC_EA_WRREQ_LEVEL[15],TCC_EA_ATOMIC_LEVEL[15],TCC_EA_ATOMIC[16],TCC_EA_RDREQ_LEVEL[16],TCC_EA_WRREQ_LEVEL[16],TCC_EA_ATOMIC_LEVEL[16],TCC_EA_ATOMIC[17],TCC_EA_RDREQ_LEVEL[17],TCC_EA_WRREQ_LEVEL[17],TCC_EA_ATOMIC_LEVEL[17],TCC_EA_ATOMIC[18],TCC_EA_RDREQ_LEVEL[18],TCC_EA_WRREQ_LEVEL[18],TCC_EA_ATOMIC_LEVEL[18],TCC_EA_ATOMIC[19],TCC_EA_RDREQ_LEVEL[19],TCC_EA_WRREQ_LEVEL[19],TCC_EA_ATOMIC_LEVEL[19],TCC_EA_ATOMIC[20],TCC_EA_RDREQ_LEVEL[20],TCC_EA_WRREQ_LEVEL[20],TCC_EA_ATOMIC_LEVEL[20],TCC_EA_ATOMIC[21],TCC_EA_RDREQ_LEVEL[21],TCC_EA_WRREQ_LEVEL[21],TCC_EA_ATOMIC_LEVEL[21],TCC_EA_ATOMIC[22],TCC_EA_RDREQ_LEVEL[22],TCC_EA_WRREQ_LEVEL[22],TCC_EA_ATOMIC_LEVEL[22],TCC_EA_ATOMIC[23],TCC_EA_RDREQ_LEVEL[23],TCC_EA_WRREQ_LEVEL[23],TCC_EA_ATOMIC_LEVEL[23],TCC_EA_ATOMIC[24],TCC_EA_RDREQ_LEVEL[24],TCC_EA_WRREQ_LEVEL[24],TCC_EA_ATOMIC_LEVEL[24],TCC_EA_ATOMIC[25],TCC_EA_RDREQ_LEVEL[25],TCC_EA_WRREQ_LEVEL[25],TCC_EA_ATOMIC_LEVEL[25],TCC_EA_ATOMIC[26],TCC_EA_RDREQ_LEVEL[26],TCC_EA_WRREQ_LEVEL[26],TCC_EA_ATOMIC_LEVEL[26],TCC_EA_ATOMIC[27],TCC_EA_RDREQ_LEVEL[27],TCC_EA_WRREQ_LEVEL[27],TCC_EA_ATOMIC_LEVEL[27],TCC_EA_ATOMIC[28],TCC_EA_RDREQ_LEVEL[28],TCC_EA_WRREQ_LEVEL[28],TCC_EA_ATOMIC_LEVEL[28],TCC_EA_ATOMIC[29],TCC_EA_RDREQ_LEVEL[29],TCC_EA_WRREQ_LEVEL[29],TCC_EA_ATOMIC_LEVEL[29],TCC_EA_ATOMIC[30],TCC_EA_RDREQ_LEVEL[30],TCC_EA_WRREQ_LEVEL[30],TCC_EA_ATOMIC_LEVEL[30],TCC_EA_ATOMIC[31],TCC_EA_RDREQ_LEVEL[31],TCC_EA_WRREQ_LEVEL[31],TCC_EA_ATOMIC_LEVEL[31],CPC_CPC_UTCL2IU_BUSY,CPC_CPC_UTCL2IU_IDLE,CPF_CMP_UTCL1_STALL_ON_TRANSLATION,SPI_RA_RES_STALL_CSN,SPI_RA_TMP_STALL_CSN,DispatchNs,Start_Timestamp,End_Timestamp,CompleteNs +0,__amd_rocclr_fillBufferAligned.kd,2,0,0,789581,789581,256,256,0,0,12,4,32,64,0x0,0x7fa983c4d800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9588165999404,9595697581667,9595697588067,9588184873222 +1,"global_write(int*, int) [clone .kd]",2,0,1,789581,789581,1,1,0,0,4,4,16,64,0x0,0x7fa983c15200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9588185231295,9595697981345,9595697983585,9588186170961 +2,"generic_write(int*, int, int) [clone .kd]",2,0,2,789581,789581,1,1,4096,0,4,4,48,64,0x0,0x7fa983c15240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9588185747445,9595697989025,9595697991265,9588187051836 +3,"global_read(int*, int) [clone .kd]",2,0,3,789581,789581,1,1,0,0,4,4,16,64,0x0,0x7fa983c15280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9588186416192,9595698033025,9595698035745,9588187966405 +4,"generic_read(int*, int, int) [clone .kd]",2,0,4,789581,789581,1,1,0,0,4,4,16,64,0x0,0x7fa983c152c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9588186956707,9595698039905,9595698042305,9588188690015 +5,"global_atomic(int*, int) [clone .kd]",2,0,5,789581,789581,1,1,0,0,4,4,16,64,0x0,0x7fa983c15300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9588187589306,9595698072385,9595698076225,9588189838613 +6,"generic_atomic(int*, int, int) [clone .kd]",2,0,6,789581,789581,1,1,4096,0,4,4,48,64,0x0,0x7fa983c15340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9588188228397,9595698076225,9595698080065,9588190792135 diff --git a/tests/workloads/multikernel/MI200/SQ_LEVEL_WAVES.csv b/tests/workloads/multikernel/MI200/SQ_LEVEL_WAVES.csv new file mode 100644 index 000000000..ba72ca851 --- /dev/null +++ b/tests/workloads/multikernel/MI200/SQ_LEVEL_WAVES.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID,queue-id,queue-index,pid,tid,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,sig,obj,SQ_CYCLES,SQ_WAVES,SQ_WAVE_CYCLES,SQ_BUSY_CYCLES,SQ_LEVEL_WAVES,SQ_ACCUM_PREV_HIRES,SQ_INSTS_VALU_TRANS_F64,SQ_INSTS_VALU_INT32,TA_ADDR_STALLED_BY_TC_CYCLES_sum,TA_TOTAL_WAVEFRONTS_sum,TCP_UTCL1_TRANSLATION_MISS_sum,TCP_UTCL1_TRANSLATION_HIT_sum,TCP_UTCL1_PERMISSION_MISS_sum,TCP_UTCL1_REQUEST_sum,TCC_EA_RDREQ_IO_CREDIT_STALL[0],TCC_EA_RDREQ_GMI_CREDIT_STALL[0],TCC_EA_RDREQ_DRAM_CREDIT_STALL[0],TCC_EA_WRREQ_IO_CREDIT_STALL[0],TCC_EA_RDREQ_IO_CREDIT_STALL[1],TCC_EA_RDREQ_GMI_CREDIT_STALL[1],TCC_EA_RDREQ_DRAM_CREDIT_STALL[1],TCC_EA_WRREQ_IO_CREDIT_STALL[1],TCC_EA_RDREQ_IO_CREDIT_STALL[2],TCC_EA_RDREQ_GMI_CREDIT_STALL[2],TCC_EA_RDREQ_DRAM_CREDIT_STALL[2],TCC_EA_WRREQ_IO_CREDIT_STALL[2],TCC_EA_RDREQ_IO_CREDIT_STALL[3],TCC_EA_RDREQ_GMI_CREDIT_STALL[3],TCC_EA_RDREQ_DRAM_CREDIT_STALL[3],TCC_EA_WRREQ_IO_CREDIT_STALL[3],TCC_EA_RDREQ_IO_CREDIT_STALL[4],TCC_EA_RDREQ_GMI_CREDIT_STALL[4],TCC_EA_RDREQ_DRAM_CREDIT_STALL[4],TCC_EA_WRREQ_IO_CREDIT_STALL[4],TCC_EA_RDREQ_IO_CREDIT_STALL[5],TCC_EA_RDREQ_GMI_CREDIT_STALL[5],TCC_EA_RDREQ_DRAM_CREDIT_STALL[5],TCC_EA_WRREQ_IO_CREDIT_STALL[5],TCC_EA_RDREQ_IO_CREDIT_STALL[6],TCC_EA_RDREQ_GMI_CREDIT_STALL[6],TCC_EA_RDREQ_DRAM_CREDIT_STALL[6],TCC_EA_WRREQ_IO_CREDIT_STALL[6],TCC_EA_RDREQ_IO_CREDIT_STALL[7],TCC_EA_RDREQ_GMI_CREDIT_STALL[7],TCC_EA_RDREQ_DRAM_CREDIT_STALL[7],TCC_EA_WRREQ_IO_CREDIT_STALL[7],TCC_EA_RDREQ_IO_CREDIT_STALL[8],TCC_EA_RDREQ_GMI_CREDIT_STALL[8],TCC_EA_RDREQ_DRAM_CREDIT_STALL[8],TCC_EA_WRREQ_IO_CREDIT_STALL[8],TCC_EA_RDREQ_IO_CREDIT_STALL[9],TCC_EA_RDREQ_GMI_CREDIT_STALL[9],TCC_EA_RDREQ_DRAM_CREDIT_STALL[9],TCC_EA_WRREQ_IO_CREDIT_STALL[9],TCC_EA_RDREQ_IO_CREDIT_STALL[10],TCC_EA_RDREQ_GMI_CREDIT_STALL[10],TCC_EA_RDREQ_DRAM_CREDIT_STALL[10],TCC_EA_WRREQ_IO_CREDIT_STALL[10],TCC_EA_RDREQ_IO_CREDIT_STALL[11],TCC_EA_RDREQ_GMI_CREDIT_STALL[11],TCC_EA_RDREQ_DRAM_CREDIT_STALL[11],TCC_EA_WRREQ_IO_CREDIT_STALL[11],TCC_EA_RDREQ_IO_CREDIT_STALL[12],TCC_EA_RDREQ_GMI_CREDIT_STALL[12],TCC_EA_RDREQ_DRAM_CREDIT_STALL[12],TCC_EA_WRREQ_IO_CREDIT_STALL[12],TCC_EA_RDREQ_IO_CREDIT_STALL[13],TCC_EA_RDREQ_GMI_CREDIT_STALL[13],TCC_EA_RDREQ_DRAM_CREDIT_STALL[13],TCC_EA_WRREQ_IO_CREDIT_STALL[13],TCC_EA_RDREQ_IO_CREDIT_STALL[14],TCC_EA_RDREQ_GMI_CREDIT_STALL[14],TCC_EA_RDREQ_DRAM_CREDIT_STALL[14],TCC_EA_WRREQ_IO_CREDIT_STALL[14],TCC_EA_RDREQ_IO_CREDIT_STALL[15],TCC_EA_RDREQ_GMI_CREDIT_STALL[15],TCC_EA_RDREQ_DRAM_CREDIT_STALL[15],TCC_EA_WRREQ_IO_CREDIT_STALL[15],TCC_EA_RDREQ_IO_CREDIT_STALL[16],TCC_EA_RDREQ_GMI_CREDIT_STALL[16],TCC_EA_RDREQ_DRAM_CREDIT_STALL[16],TCC_EA_WRREQ_IO_CREDIT_STALL[16],TCC_EA_RDREQ_IO_CREDIT_STALL[17],TCC_EA_RDREQ_GMI_CREDIT_STALL[17],TCC_EA_RDREQ_DRAM_CREDIT_STALL[17],TCC_EA_WRREQ_IO_CREDIT_STALL[17],TCC_EA_RDREQ_IO_CREDIT_STALL[18],TCC_EA_RDREQ_GMI_CREDIT_STALL[18],TCC_EA_RDREQ_DRAM_CREDIT_STALL[18],TCC_EA_WRREQ_IO_CREDIT_STALL[18],TCC_EA_RDREQ_IO_CREDIT_STALL[19],TCC_EA_RDREQ_GMI_CREDIT_STALL[19],TCC_EA_RDREQ_DRAM_CREDIT_STALL[19],TCC_EA_WRREQ_IO_CREDIT_STALL[19],TCC_EA_RDREQ_IO_CREDIT_STALL[20],TCC_EA_RDREQ_GMI_CREDIT_STALL[20],TCC_EA_RDREQ_DRAM_CREDIT_STALL[20],TCC_EA_WRREQ_IO_CREDIT_STALL[20],TCC_EA_RDREQ_IO_CREDIT_STALL[21],TCC_EA_RDREQ_GMI_CREDIT_STALL[21],TCC_EA_RDREQ_DRAM_CREDIT_STALL[21],TCC_EA_WRREQ_IO_CREDIT_STALL[21],TCC_EA_RDREQ_IO_CREDIT_STALL[22],TCC_EA_RDREQ_GMI_CREDIT_STALL[22],TCC_EA_RDREQ_DRAM_CREDIT_STALL[22],TCC_EA_WRREQ_IO_CREDIT_STALL[22],TCC_EA_RDREQ_IO_CREDIT_STALL[23],TCC_EA_RDREQ_GMI_CREDIT_STALL[23],TCC_EA_RDREQ_DRAM_CREDIT_STALL[23],TCC_EA_WRREQ_IO_CREDIT_STALL[23],TCC_EA_RDREQ_IO_CREDIT_STALL[24],TCC_EA_RDREQ_GMI_CREDIT_STALL[24],TCC_EA_RDREQ_DRAM_CREDIT_STALL[24],TCC_EA_WRREQ_IO_CREDIT_STALL[24],TCC_EA_RDREQ_IO_CREDIT_STALL[25],TCC_EA_RDREQ_GMI_CREDIT_STALL[25],TCC_EA_RDREQ_DRAM_CREDIT_STALL[25],TCC_EA_WRREQ_IO_CREDIT_STALL[25],TCC_EA_RDREQ_IO_CREDIT_STALL[26],TCC_EA_RDREQ_GMI_CREDIT_STALL[26],TCC_EA_RDREQ_DRAM_CREDIT_STALL[26],TCC_EA_WRREQ_IO_CREDIT_STALL[26],TCC_EA_RDREQ_IO_CREDIT_STALL[27],TCC_EA_RDREQ_GMI_CREDIT_STALL[27],TCC_EA_RDREQ_DRAM_CREDIT_STALL[27],TCC_EA_WRREQ_IO_CREDIT_STALL[27],TCC_EA_RDREQ_IO_CREDIT_STALL[28],TCC_EA_RDREQ_GMI_CREDIT_STALL[28],TCC_EA_RDREQ_DRAM_CREDIT_STALL[28],TCC_EA_WRREQ_IO_CREDIT_STALL[28],TCC_EA_RDREQ_IO_CREDIT_STALL[29],TCC_EA_RDREQ_GMI_CREDIT_STALL[29],TCC_EA_RDREQ_DRAM_CREDIT_STALL[29],TCC_EA_WRREQ_IO_CREDIT_STALL[29],TCC_EA_RDREQ_IO_CREDIT_STALL[30],TCC_EA_RDREQ_GMI_CREDIT_STALL[30],TCC_EA_RDREQ_DRAM_CREDIT_STALL[30],TCC_EA_WRREQ_IO_CREDIT_STALL[30],TCC_EA_RDREQ_IO_CREDIT_STALL[31],TCC_EA_RDREQ_GMI_CREDIT_STALL[31],TCC_EA_RDREQ_DRAM_CREDIT_STALL[31],TCC_EA_WRREQ_IO_CREDIT_STALL[31],CPC_ME1_BUSY_FOR_PACKET_DECODE,CPC_CPC_UTCL2IU_STALL,SPI_RA_WAVE_SIMD_FULL_CSN,SPI_RA_VGPR_SIMD_FULL_CSN,GRBM_COUNT,GRBM_GUI_ACTIVE,DispatchNs,Start_Timestamp,End_Timestamp,CompleteNs +0,__amd_rocclr_fillBufferAligned.kd,2,0,0,789767,789767,256,256,0,0,12,4,32,64,0x0,0x7f0b7a335800,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,15256,15256,9588800635588,9595697581667,9595697588067,9588818795995 +1,"global_write(int*, int) [clone .kd]",2,0,1,789767,789767,1,1,0,0,4,4,16,64,0x0,0x7f0b7a319200,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,14215,14215,9588819151083,9595697981345,9595697983585,9588820066433 +2,"generic_write(int*, int, int) [clone .kd]",2,0,2,789767,789767,1,1,4096,0,4,4,48,64,0x0,0x7f0b7a319240,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,16303,16303,9588819631105,9595697989025,9595697991265,9588820711946 +3,"global_read(int*, int) [clone .kd]",2,0,3,789767,789767,1,1,0,0,4,4,16,64,0x0,0x7f0b7a319280,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,17633,17633,9588820290945,9595698033025,9595698035745,9588821548468 +4,"generic_read(int*, int, int) [clone .kd]",2,0,4,789767,789767,1,1,0,0,4,4,16,64,0x0,0x7f0b7a3192c0,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,16725,16725,9588820914807,9595698039905,9595698042305,9588822390240 +5,"global_atomic(int*, int) [clone .kd]",2,0,5,789767,789767,1,1,0,0,4,4,16,64,0x0,0x7f0b7a319300,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,17452,17452,9588821374712,9595698072385,9595698076225,9588823186356 +6,"generic_atomic(int*, int, int) [clone .kd]",2,0,6,789767,789767,1,1,4096,0,4,4,48,64,0x0,0x7f0b7a319340,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,17418,17418,9588822047877,9595698076225,9595698080065,9588824015775 diff --git a/tests/workloads/multikernel/MI200/log.txt b/tests/workloads/multikernel/MI200/log.txt new file mode 100644 index 000000000..2b99bd0b4 --- /dev/null +++ b/tests/workloads/multikernel/MI200/log.txt @@ -0,0 +1,270 @@ +Omniperf version: 2.1.0 +Profiler choice: rocprofv1 +Path: /home/zichguan/omniperf_test/omniperf/tests/workloads/multikernel/MI210 +Target: MI200 +Command: ./sample/vmem +Kernel Selection: None +Dispatch Selection: None +Hardware Blocks: All + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Collecting Performance Counters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/SQ_IFETCH_LEVEL.txt + |-> [rocprof] RPL: on '241022_154555' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/SQ_IFETCH_LEVEL.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154555_788857' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154555_788857/input0_results_241022_154555' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154555_788857/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 152 metrics + |-> [rocprof] SQ_WAVES, SQ_IFETCH, SQ_IFETCH_LEVEL, SQ_ACCUM_PREV_HIRES, SQ_BUSY_CU_CYCLES, SQ_INSTS_VALU_CVT, SQ_INSTS_VMEM_WR, SQ_INSTS_VMEM_RD, TA_TA_BUSY_sum, TA_BUFFER_WAVEFRONTS_sum, TD_TD_BUSY_sum, TD_TC_STALL_sum, TCP_GATE_EN1_sum, TCP_GATE_EN2_sum, TCP_TD_TCP_STALL_CYCLES_sum, TCP_TCR_TCP_STALL_CYCLES_sum, TCC_CYCLE[0], TCC_RW_REQ[0], TCC_HIT[0], TCC_MISS[0], TCC_CYCLE[1], TCC_RW_REQ[1], TCC_HIT[1], TCC_MISS[1], TCC_CYCLE[2], TCC_RW_REQ[2], TCC_HIT[2], TCC_MISS[2], TCC_CYCLE[3], TCC_RW_REQ[3], TCC_HIT[3], TCC_MISS[3], TCC_CYCLE[4], TCC_RW_REQ[4], TCC_HIT[4], TCC_MISS[4], TCC_CYCLE[5], TCC_RW_REQ[5], TCC_HIT[5], TCC_MISS[5], TCC_CYCLE[6], TCC_RW_REQ[6], TCC_HIT[6], TCC_MISS[6], TCC_CYCLE[7], TCC_RW_REQ[7], TCC_HIT[7], TCC_MISS[7], TCC_CYCLE[8], TCC_RW_REQ[8], TCC_HIT[8], TCC_MISS[8], TCC_CYCLE[9], TCC_RW_REQ[9], TCC_HIT[9], TCC_MISS[9], TCC_CYCLE[10], TCC_RW_REQ[10], TCC_HIT[10], TCC_MISS[10], TCC_CYCLE[11], TCC_RW_REQ[11], TCC_HIT[11], TCC_MISS[11], TCC_CYCLE[12], TCC_RW_REQ[12], TCC_HIT[12], TCC_MISS[12], TCC_CYCLE[13], TCC_RW_REQ[13], TCC_HIT[13], TCC_MISS[13], TCC_CYCLE[14], TCC_RW_REQ[14], TCC_HIT[14], TCC_MISS[14], TCC_CYCLE[15], TCC_RW_REQ[15], TCC_HIT[15], TCC_MISS[15], TCC_CYCLE[16], TCC_RW_REQ[16], TCC_HIT[16], TCC_MISS[16], TCC_CYCLE[17], TCC_RW_REQ[17], TCC_HIT[17], TCC_MISS[17], TCC_CYCLE[18], TCC_RW_REQ[18], TCC_HIT[18], TCC_MISS[18], TCC_CYCLE[19], TCC_RW_REQ[19], TCC_HIT[19], TCC_MISS[19], TCC_CYCLE[20], TCC_RW_REQ[20], TCC_HIT[20], TCC_MISS[20], TCC_CYCLE[21], TCC_RW_REQ[21], TCC_HIT[21], TCC_MISS[21], TCC_CYCLE[22], TCC_RW_REQ[22], TCC_HIT[22], TCC_MISS[22], TCC_CYCLE[23], TCC_RW_REQ[23], TCC_HIT[23], TCC_MISS[23], TCC_CYCLE[24], TCC_RW_REQ[24], TCC_HIT[24], TCC_MISS[24], TCC_CYCLE[25], TCC_RW_REQ[25], TCC_HIT[25], TCC_MISS[25], TCC_CYCLE[26], TCC_RW_REQ[26], TCC_HIT[26], TCC_MISS[26], TCC_CYCLE[27], TCC_RW_REQ[27], TCC_HIT[27], TCC_MISS[27], TCC_CYCLE[28], TCC_RW_REQ[28], TCC_HIT[28], TCC_MISS[28], TCC_CYCLE[29], TCC_RW_REQ[29], TCC_HIT[29], TCC_MISS[29], TCC_CYCLE[30], TCC_RW_REQ[30], TCC_HIT[30], TCC_MISS[30], TCC_CYCLE[31], TCC_RW_REQ[31], TCC_HIT[31], TCC_MISS[31], CPC_CPC_STAT_BUSY, CPC_CPC_STAT_IDLE, CPF_CPF_STAT_BUSY, CPF_CPF_STAT_STALL, SPI_CSN_WINDOW_VALID, SPI_CSN_BUSY, GRBM_COUNT, GRBM_GUI_ACTIVE + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154555_788857/input0_results_241022_154555 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/SQ_IFETCH_LEVEL.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/SQ_INST_LEVEL_LDS.txt + |-> [rocprof] RPL: on '241022_154556' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/SQ_INST_LEVEL_LDS.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154556_789045' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154556_789045/input0_results_241022_154556' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154556_789045/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 151 metrics + |-> [rocprof] SQ_INSTS_LDS, SQ_INST_LEVEL_LDS, SQ_ACCUM_PREV_HIRES, SQ_INSTS_SALU, SQ_INSTS_VSKIPPED, SQ_INSTS, SQ_INSTS_VALU, SQ_INSTS_VALU_ADD_F16, TA_BUFFER_READ_WAVEFRONTS_sum, TA_BUFFER_WRITE_WAVEFRONTS_sum, TD_SPI_STALL_sum, TD_LOAD_WAVEFRONT_sum, TCP_READ_TAGCONFLICT_STALL_CYCLES_sum, TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum, TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum, TCP_TA_TCP_STATE_READ_sum, TCC_REQ[0], TCC_READ[0], TCC_WRITE[0], TCC_ATOMIC[0], TCC_REQ[1], TCC_READ[1], TCC_WRITE[1], TCC_ATOMIC[1], TCC_REQ[2], TCC_READ[2], TCC_WRITE[2], TCC_ATOMIC[2], TCC_REQ[3], TCC_READ[3], TCC_WRITE[3], TCC_ATOMIC[3], TCC_REQ[4], TCC_READ[4], TCC_WRITE[4], TCC_ATOMIC[4], TCC_REQ[5], TCC_READ[5], TCC_WRITE[5], TCC_ATOMIC[5], TCC_REQ[6], TCC_READ[6], TCC_WRITE[6], TCC_ATOMIC[6], TCC_REQ[7], TCC_READ[7], TCC_WRITE[7], TCC_ATOMIC[7], TCC_REQ[8], TCC_READ[8], TCC_WRITE[8], TCC_ATOMIC[8], TCC_REQ[9], TCC_READ[9], TCC_WRITE[9], TCC_ATOMIC[9], TCC_REQ[10], TCC_READ[10], TCC_WRITE[10], TCC_ATOMIC[10], TCC_REQ[11], TCC_READ[11], TCC_WRITE[11], TCC_ATOMIC[11], TCC_REQ[12], TCC_READ[12], TCC_WRITE[12], TCC_ATOMIC[12], TCC_REQ[13], TCC_READ[13], TCC_WRITE[13], TCC_ATOMIC[13], TCC_REQ[14], TCC_READ[14], TCC_WRITE[14], TCC_ATOMIC[14], TCC_REQ[15], TCC_READ[15], TCC_WRITE[15], TCC_ATOMIC[15], TCC_REQ[16], TCC_READ[16], TCC_WRITE[16], TCC_ATOMIC[16], TCC_REQ[17], TCC_READ[17], TCC_WRITE[17], TCC_ATOMIC[17], TCC_REQ[18], TCC_READ[18], TCC_WRITE[18], TCC_ATOMIC[18], TCC_REQ[19], TCC_READ[19], TCC_WRITE[19], TCC_ATOMIC[19], TCC_REQ[20], TCC_READ[20], TCC_WRITE[20], TCC_ATOMIC[20], TCC_REQ[21], TCC_READ[21], TCC_WRITE[21], TCC_ATOMIC[21], TCC_REQ[22], TCC_READ[22], TCC_WRITE[22], TCC_ATOMIC[22], TCC_REQ[23], TCC_READ[23], TCC_WRITE[23], TCC_ATOMIC[23], TCC_REQ[24], TCC_READ[24], TCC_WRITE[24], TCC_ATOMIC[24], TCC_REQ[25], TCC_READ[25], TCC_WRITE[25], TCC_ATOMIC[25], TCC_REQ[26], TCC_READ[26], TCC_WRITE[26], TCC_ATOMIC[26], TCC_REQ[27], TCC_READ[27], TCC_WRITE[27], TCC_ATOMIC[27], TCC_REQ[28], TCC_READ[28], TCC_WRITE[28], TCC_ATOMIC[28], TCC_REQ[29], TCC_READ[29], TCC_WRITE[29], TCC_ATOMIC[29], TCC_REQ[30], TCC_READ[30], TCC_WRITE[30], TCC_ATOMIC[30], TCC_REQ[31], TCC_READ[31], TCC_WRITE[31], TCC_ATOMIC[31], CPC_CPC_TCIU_BUSY, CPC_CPC_TCIU_IDLE, CPF_CPF_TCIU_BUSY, CPF_CPF_TCIU_STALL, SPI_CSN_NUM_THREADGROUPS, SPI_CSN_WAVE, GRBM_SPI_BUSY + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154556_789045/input0_results_241022_154556 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/SQ_INST_LEVEL_LDS.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/SQ_INST_LEVEL_SMEM.txt + |-> [rocprof] RPL: on '241022_154556' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/SQ_INST_LEVEL_SMEM.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154556_789233' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154556_789233/input0_results_241022_154556' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154556_789233/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 150 metrics + |-> [rocprof] SQ_INSTS_SMEM, SQ_INST_LEVEL_SMEM, SQ_ACCUM_PREV_HIRES, SQ_INSTS_VALU_MUL_F16, SQ_INSTS_VALU_FMA_F16, SQ_INSTS_VALU_TRANS_F16, SQ_INSTS_VALU_ADD_F32, SQ_INSTS_VALU_MUL_F32, TA_BUFFER_ATOMIC_WAVEFRONTS_sum, TA_BUFFER_TOTAL_CYCLES_sum, TD_ATOMIC_WAVEFRONT_sum, TD_STORE_WAVEFRONT_sum, TCP_VOLATILE_sum, TCP_TOTAL_ACCESSES_sum, TCP_TOTAL_READ_sum, TCP_TOTAL_WRITE_sum, TCC_EA_RDREQ[0], TCC_EA_RDREQ_32B[0], TCC_EA_WRREQ[0], TCC_EA_WRREQ_64B[0], TCC_EA_RDREQ[1], TCC_EA_RDREQ_32B[1], TCC_EA_WRREQ[1], TCC_EA_WRREQ_64B[1], TCC_EA_RDREQ[2], TCC_EA_RDREQ_32B[2], TCC_EA_WRREQ[2], TCC_EA_WRREQ_64B[2], TCC_EA_RDREQ[3], TCC_EA_RDREQ_32B[3], TCC_EA_WRREQ[3], TCC_EA_WRREQ_64B[3], TCC_EA_RDREQ[4], TCC_EA_RDREQ_32B[4], TCC_EA_WRREQ[4], TCC_EA_WRREQ_64B[4], TCC_EA_RDREQ[5], TCC_EA_RDREQ_32B[5], TCC_EA_WRREQ[5], TCC_EA_WRREQ_64B[5], TCC_EA_RDREQ[6], TCC_EA_RDREQ_32B[6], TCC_EA_WRREQ[6], TCC_EA_WRREQ_64B[6], TCC_EA_RDREQ[7], TCC_EA_RDREQ_32B[7], TCC_EA_WRREQ[7], TCC_EA_WRREQ_64B[7], TCC_EA_RDREQ[8], TCC_EA_RDREQ_32B[8], TCC_EA_WRREQ[8], TCC_EA_WRREQ_64B[8], TCC_EA_RDREQ[9], TCC_EA_RDREQ_32B[9], TCC_EA_WRREQ[9], TCC_EA_WRREQ_64B[9], TCC_EA_RDREQ[10], TCC_EA_RDREQ_32B[10], TCC_EA_WRREQ[10], TCC_EA_WRREQ_64B[10], TCC_EA_RDREQ[11], TCC_EA_RDREQ_32B[11], TCC_EA_WRREQ[11], TCC_EA_WRREQ_64B[11], TCC_EA_RDREQ[12], TCC_EA_RDREQ_32B[12], TCC_EA_WRREQ[12], TCC_EA_WRREQ_64B[12], TCC_EA_RDREQ[13], TCC_EA_RDREQ_32B[13], TCC_EA_WRREQ[13], TCC_EA_WRREQ_64B[13], TCC_EA_RDREQ[14], TCC_EA_RDREQ_32B[14], TCC_EA_WRREQ[14], TCC_EA_WRREQ_64B[14], TCC_EA_RDREQ[15], TCC_EA_RDREQ_32B[15], TCC_EA_WRREQ[15], TCC_EA_WRREQ_64B[15], TCC_EA_RDREQ[16], TCC_EA_RDREQ_32B[16], TCC_EA_WRREQ[16], TCC_EA_WRREQ_64B[16], TCC_EA_RDREQ[17], TCC_EA_RDREQ_32B[17], TCC_EA_WRREQ[17], TCC_EA_WRREQ_64B[17], TCC_EA_RDREQ[18], TCC_EA_RDREQ_32B[18], TCC_EA_WRREQ[18], TCC_EA_WRREQ_64B[18], TCC_EA_RDREQ[19], TCC_EA_RDREQ_32B[19], TCC_EA_WRREQ[19], TCC_EA_WRREQ_64B[19], TCC_EA_RDREQ[20], TCC_EA_RDREQ_32B[20], TCC_EA_WRREQ[20], TCC_EA_WRREQ_64B[20], TCC_EA_RDREQ[21], TCC_EA_RDREQ_32B[21], TCC_EA_WRREQ[21], TCC_EA_WRREQ_64B[21], TCC_EA_RDREQ[22], TCC_EA_RDREQ_32B[22], TCC_EA_WRREQ[22], TCC_EA_WRREQ_64B[22], TCC_EA_RDREQ[23], TCC_EA_RDREQ_32B[23], TCC_EA_WRREQ[23], TCC_EA_WRREQ_64B[23], TCC_EA_RDREQ[24], TCC_EA_RDREQ_32B[24], TCC_EA_WRREQ[24], TCC_EA_WRREQ_64B[24], TCC_EA_RDREQ[25], TCC_EA_RDREQ_32B[25], TCC_EA_WRREQ[25], TCC_EA_WRREQ_64B[25], TCC_EA_RDREQ[26], TCC_EA_RDREQ_32B[26], TCC_EA_WRREQ[26], TCC_EA_WRREQ_64B[26], TCC_EA_RDREQ[27], TCC_EA_RDREQ_32B[27], TCC_EA_WRREQ[27], TCC_EA_WRREQ_64B[27], TCC_EA_RDREQ[28], TCC_EA_RDREQ_32B[28], TCC_EA_WRREQ[28], TCC_EA_WRREQ_64B[28], TCC_EA_RDREQ[29], TCC_EA_RDREQ_32B[29], TCC_EA_WRREQ[29], TCC_EA_WRREQ_64B[29], TCC_EA_RDREQ[30], TCC_EA_RDREQ_32B[30], TCC_EA_WRREQ[30], TCC_EA_WRREQ_64B[30], TCC_EA_RDREQ[31], TCC_EA_RDREQ_32B[31], TCC_EA_WRREQ[31], TCC_EA_WRREQ_64B[31], CPC_CPC_STAT_STALL, CPC_UTCL1_STALL_ON_TRANSLATION, CPF_CPF_STAT_IDLE, CPF_CPF_TCIU_IDLE, SPI_RA_REQ_NO_ALLOC, SPI_RA_REQ_NO_ALLOC_CSN + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154556_789233/input0_results_241022_154556 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/SQ_INST_LEVEL_SMEM.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/SQ_INST_LEVEL_VMEM.txt + |-> [rocprof] RPL: on '241022_154557' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/SQ_INST_LEVEL_VMEM.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154557_789421' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154557_789421/input0_results_241022_154557' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154557_789421/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 148 metrics + |-> [rocprof] SQ_INSTS_VMEM, SQ_INST_LEVEL_VMEM, SQ_ACCUM_PREV_HIRES, SQ_INSTS_VALU_FMA_F32, SQ_INSTS_VALU_TRANS_F32, SQ_INSTS_VALU_ADD_F64, SQ_INSTS_VALU_MUL_F64, SQ_INSTS_VALU_FMA_F64, TA_BUFFER_COALESCED_READ_CYCLES_sum, TA_BUFFER_COALESCED_WRITE_CYCLES_sum, TD_COALESCABLE_WAVEFRONT_sum, TCP_TOTAL_ATOMIC_WITH_RET_sum, TCP_TOTAL_ATOMIC_WITHOUT_RET_sum, TCP_TOTAL_WRITEBACK_INVALIDATES_sum, TCP_TOTAL_CACHE_ACCESSES_sum, TCC_EA_ATOMIC[0], TCC_EA_RDREQ_LEVEL[0], TCC_EA_WRREQ_LEVEL[0], TCC_EA_ATOMIC_LEVEL[0], TCC_EA_ATOMIC[1], TCC_EA_RDREQ_LEVEL[1], TCC_EA_WRREQ_LEVEL[1], TCC_EA_ATOMIC_LEVEL[1], TCC_EA_ATOMIC[2], TCC_EA_RDREQ_LEVEL[2], TCC_EA_WRREQ_LEVEL[2], TCC_EA_ATOMIC_LEVEL[2], TCC_EA_ATOMIC[3], TCC_EA_RDREQ_LEVEL[3], TCC_EA_WRREQ_LEVEL[3], TCC_EA_ATOMIC_LEVEL[3], TCC_EA_ATOMIC[4], TCC_EA_RDREQ_LEVEL[4], TCC_EA_WRREQ_LEVEL[4], TCC_EA_ATOMIC_LEVEL[4], TCC_EA_ATOMIC[5], TCC_EA_RDREQ_LEVEL[5], TCC_EA_WRREQ_LEVEL[5], TCC_EA_ATOMIC_LEVEL[5], TCC_EA_ATOMIC[6], TCC_EA_RDREQ_LEVEL[6], TCC_EA_WRREQ_LEVEL[6], TCC_EA_ATOMIC_LEVEL[6], TCC_EA_ATOMIC[7], TCC_EA_RDREQ_LEVEL[7], TCC_EA_WRREQ_LEVEL[7], TCC_EA_ATOMIC_LEVEL[7], TCC_EA_ATOMIC[8], TCC_EA_RDREQ_LEVEL[8], TCC_EA_WRREQ_LEVEL[8], TCC_EA_ATOMIC_LEVEL[8], TCC_EA_ATOMIC[9], TCC_EA_RDREQ_LEVEL[9], TCC_EA_WRREQ_LEVEL[9], TCC_EA_ATOMIC_LEVEL[9], TCC_EA_ATOMIC[10], TCC_EA_RDREQ_LEVEL[10], TCC_EA_WRREQ_LEVEL[10], TCC_EA_ATOMIC_LEVEL[10], TCC_EA_ATOMIC[11], TCC_EA_RDREQ_LEVEL[11], TCC_EA_WRREQ_LEVEL[11], TCC_EA_ATOMIC_LEVEL[11], TCC_EA_ATOMIC[12], TCC_EA_RDREQ_LEVEL[12], TCC_EA_WRREQ_LEVEL[12], TCC_EA_ATOMIC_LEVEL[12], TCC_EA_ATOMIC[13], TCC_EA_RDREQ_LEVEL[13], TCC_EA_WRREQ_LEVEL[13], TCC_EA_ATOMIC_LEVEL[13], TCC_EA_ATOMIC[14], TCC_EA_RDREQ_LEVEL[14], TCC_EA_WRREQ_LEVEL[14], TCC_EA_ATOMIC_LEVEL[14], TCC_EA_ATOMIC[15], TCC_EA_RDREQ_LEVEL[15], TCC_EA_WRREQ_LEVEL[15], TCC_EA_ATOMIC_LEVEL[15], TCC_EA_ATOMIC[16], TCC_EA_RDREQ_LEVEL[16], TCC_EA_WRREQ_LEVEL[16], TCC_EA_ATOMIC_LEVEL[16], TCC_EA_ATOMIC[17], TCC_EA_RDREQ_LEVEL[17], TCC_EA_WRREQ_LEVEL[17], TCC_EA_ATOMIC_LEVEL[17], TCC_EA_ATOMIC[18], TCC_EA_RDREQ_LEVEL[18], TCC_EA_WRREQ_LEVEL[18], TCC_EA_ATOMIC_LEVEL[18], TCC_EA_ATOMIC[19], TCC_EA_RDREQ_LEVEL[19], TCC_EA_WRREQ_LEVEL[19], TCC_EA_ATOMIC_LEVEL[19], TCC_EA_ATOMIC[20], TCC_EA_RDREQ_LEVEL[20], TCC_EA_WRREQ_LEVEL[20], TCC_EA_ATOMIC_LEVEL[20], TCC_EA_ATOMIC[21], TCC_EA_RDREQ_LEVEL[21], TCC_EA_WRREQ_LEVEL[21], TCC_EA_ATOMIC_LEVEL[21], TCC_EA_ATOMIC[22], TCC_EA_RDREQ_LEVEL[22], TCC_EA_WRREQ_LEVEL[22], TCC_EA_ATOMIC_LEVEL[22], TCC_EA_ATOMIC[23], TCC_EA_RDREQ_LEVEL[23], TCC_EA_WRREQ_LEVEL[23], TCC_EA_ATOMIC_LEVEL[23], TCC_EA_ATOMIC[24], TCC_EA_RDREQ_LEVEL[24], TCC_EA_WRREQ_LEVEL[24], TCC_EA_ATOMIC_LEVEL[24], TCC_EA_ATOMIC[25], TCC_EA_RDREQ_LEVEL[25], TCC_EA_WRREQ_LEVEL[25], TCC_EA_ATOMIC_LEVEL[25], TCC_EA_ATOMIC[26], TCC_EA_RDREQ_LEVEL[26], TCC_EA_WRREQ_LEVEL[26], TCC_EA_ATOMIC_LEVEL[26], TCC_EA_ATOMIC[27], TCC_EA_RDREQ_LEVEL[27], TCC_EA_WRREQ_LEVEL[27], TCC_EA_ATOMIC_LEVEL[27], TCC_EA_ATOMIC[28], TCC_EA_RDREQ_LEVEL[28], TCC_EA_WRREQ_LEVEL[28], TCC_EA_ATOMIC_LEVEL[28], TCC_EA_ATOMIC[29], TCC_EA_RDREQ_LEVEL[29], TCC_EA_WRREQ_LEVEL[29], TCC_EA_ATOMIC_LEVEL[29], TCC_EA_ATOMIC[30], TCC_EA_RDREQ_LEVEL[30], TCC_EA_WRREQ_LEVEL[30], TCC_EA_ATOMIC_LEVEL[30], TCC_EA_ATOMIC[31], TCC_EA_RDREQ_LEVEL[31], TCC_EA_WRREQ_LEVEL[31], TCC_EA_ATOMIC_LEVEL[31], CPC_CPC_UTCL2IU_BUSY, CPC_CPC_UTCL2IU_IDLE, CPF_CMP_UTCL1_STALL_ON_TRANSLATION, SPI_RA_RES_STALL_CSN, SPI_RA_TMP_STALL_CSN + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154557_789421/input0_results_241022_154557 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/SQ_INST_LEVEL_VMEM.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/SQ_LEVEL_WAVES.txt + |-> [rocprof] RPL: on '241022_154558' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/SQ_LEVEL_WAVES.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154558_789607' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154558_789607/input0_results_241022_154558' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154558_789607/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 148 metrics + |-> [rocprof] SQ_CYCLES, SQ_WAVES, SQ_WAVE_CYCLES, SQ_BUSY_CYCLES, SQ_LEVEL_WAVES, SQ_ACCUM_PREV_HIRES, SQ_INSTS_VALU_TRANS_F64, SQ_INSTS_VALU_INT32, TA_ADDR_STALLED_BY_TC_CYCLES_sum, TA_TOTAL_WAVEFRONTS_sum, TCP_UTCL1_TRANSLATION_MISS_sum, TCP_UTCL1_TRANSLATION_HIT_sum, TCP_UTCL1_PERMISSION_MISS_sum, TCP_UTCL1_REQUEST_sum, TCC_EA_RDREQ_IO_CREDIT_STALL[0], TCC_EA_RDREQ_GMI_CREDIT_STALL[0], TCC_EA_RDREQ_DRAM_CREDIT_STALL[0], TCC_EA_WRREQ_IO_CREDIT_STALL[0], TCC_EA_RDREQ_IO_CREDIT_STALL[1], TCC_EA_RDREQ_GMI_CREDIT_STALL[1], TCC_EA_RDREQ_DRAM_CREDIT_STALL[1], TCC_EA_WRREQ_IO_CREDIT_STALL[1], TCC_EA_RDREQ_IO_CREDIT_STALL[2], TCC_EA_RDREQ_GMI_CREDIT_STALL[2], TCC_EA_RDREQ_DRAM_CREDIT_STALL[2], TCC_EA_WRREQ_IO_CREDIT_STALL[2], TCC_EA_RDREQ_IO_CREDIT_STALL[3], TCC_EA_RDREQ_GMI_CREDIT_STALL[3], TCC_EA_RDREQ_DRAM_CREDIT_STALL[3], TCC_EA_WRREQ_IO_CREDIT_STALL[3], TCC_EA_RDREQ_IO_CREDIT_STALL[4], TCC_EA_RDREQ_GMI_CREDIT_STALL[4], TCC_EA_RDREQ_DRAM_CREDIT_STALL[4], TCC_EA_WRREQ_IO_CREDIT_STALL[4], TCC_EA_RDREQ_IO_CREDIT_STALL[5], TCC_EA_RDREQ_GMI_CREDIT_STALL[5], TCC_EA_RDREQ_DRAM_CREDIT_STALL[5], TCC_EA_WRREQ_IO_CREDIT_STALL[5], TCC_EA_RDREQ_IO_CREDIT_STALL[6], TCC_EA_RDREQ_GMI_CREDIT_STALL[6], TCC_EA_RDREQ_DRAM_CREDIT_STALL[6], TCC_EA_WRREQ_IO_CREDIT_STALL[6], TCC_EA_RDREQ_IO_CREDIT_STALL[7], TCC_EA_RDREQ_GMI_CREDIT_STALL[7], TCC_EA_RDREQ_DRAM_CREDIT_STALL[7], TCC_EA_WRREQ_IO_CREDIT_STALL[7], TCC_EA_RDREQ_IO_CREDIT_STALL[8], TCC_EA_RDREQ_GMI_CREDIT_STALL[8], TCC_EA_RDREQ_DRAM_CREDIT_STALL[8], TCC_EA_WRREQ_IO_CREDIT_STALL[8], TCC_EA_RDREQ_IO_CREDIT_STALL[9], TCC_EA_RDREQ_GMI_CREDIT_STALL[9], TCC_EA_RDREQ_DRAM_CREDIT_STALL[9], TCC_EA_WRREQ_IO_CREDIT_STALL[9], TCC_EA_RDREQ_IO_CREDIT_STALL[10], TCC_EA_RDREQ_GMI_CREDIT_STALL[10], TCC_EA_RDREQ_DRAM_CREDIT_STALL[10], TCC_EA_WRREQ_IO_CREDIT_STALL[10], TCC_EA_RDREQ_IO_CREDIT_STALL[11], TCC_EA_RDREQ_GMI_CREDIT_STALL[11], TCC_EA_RDREQ_DRAM_CREDIT_STALL[11], TCC_EA_WRREQ_IO_CREDIT_STALL[11], TCC_EA_RDREQ_IO_CREDIT_STALL[12], TCC_EA_RDREQ_GMI_CREDIT_STALL[12], TCC_EA_RDREQ_DRAM_CREDIT_STALL[12], TCC_EA_WRREQ_IO_CREDIT_STALL[12], TCC_EA_RDREQ_IO_CREDIT_STALL[13], TCC_EA_RDREQ_GMI_CREDIT_STALL[13], TCC_EA_RDREQ_DRAM_CREDIT_STALL[13], TCC_EA_WRREQ_IO_CREDIT_STALL[13], TCC_EA_RDREQ_IO_CREDIT_STALL[14], TCC_EA_RDREQ_GMI_CREDIT_STALL[14], TCC_EA_RDREQ_DRAM_CREDIT_STALL[14], TCC_EA_WRREQ_IO_CREDIT_STALL[14], TCC_EA_RDREQ_IO_CREDIT_STALL[15], TCC_EA_RDREQ_GMI_CREDIT_STALL[15], TCC_EA_RDREQ_DRAM_CREDIT_STALL[15], TCC_EA_WRREQ_IO_CREDIT_STALL[15], TCC_EA_RDREQ_IO_CREDIT_STALL[16], TCC_EA_RDREQ_GMI_CREDIT_STALL[16], TCC_EA_RDREQ_DRAM_CREDIT_STALL[16], TCC_EA_WRREQ_IO_CREDIT_STALL[16], TCC_EA_RDREQ_IO_CREDIT_STALL[17], TCC_EA_RDREQ_GMI_CREDIT_STALL[17], TCC_EA_RDREQ_DRAM_CREDIT_STALL[17], TCC_EA_WRREQ_IO_CREDIT_STALL[17], TCC_EA_RDREQ_IO_CREDIT_STALL[18], TCC_EA_RDREQ_GMI_CREDIT_STALL[18], TCC_EA_RDREQ_DRAM_CREDIT_STALL[18], TCC_EA_WRREQ_IO_CREDIT_STALL[18], TCC_EA_RDREQ_IO_CREDIT_STALL[19], TCC_EA_RDREQ_GMI_CREDIT_STALL[19], TCC_EA_RDREQ_DRAM_CREDIT_STALL[19], TCC_EA_WRREQ_IO_CREDIT_STALL[19], TCC_EA_RDREQ_IO_CREDIT_STALL[20], TCC_EA_RDREQ_GMI_CREDIT_STALL[20], TCC_EA_RDREQ_DRAM_CREDIT_STALL[20], TCC_EA_WRREQ_IO_CREDIT_STALL[20], TCC_EA_RDREQ_IO_CREDIT_STALL[21], TCC_EA_RDREQ_GMI_CREDIT_STALL[21], TCC_EA_RDREQ_DRAM_CREDIT_STALL[21], TCC_EA_WRREQ_IO_CREDIT_STALL[21], TCC_EA_RDREQ_IO_CREDIT_STALL[22], TCC_EA_RDREQ_GMI_CREDIT_STALL[22], TCC_EA_RDREQ_DRAM_CREDIT_STALL[22], TCC_EA_WRREQ_IO_CREDIT_STALL[22], TCC_EA_RDREQ_IO_CREDIT_STALL[23], TCC_EA_RDREQ_GMI_CREDIT_STALL[23], TCC_EA_RDREQ_DRAM_CREDIT_STALL[23], TCC_EA_WRREQ_IO_CREDIT_STALL[23], TCC_EA_RDREQ_IO_CREDIT_STALL[24], TCC_EA_RDREQ_GMI_CREDIT_STALL[24], TCC_EA_RDREQ_DRAM_CREDIT_STALL[24], TCC_EA_WRREQ_IO_CREDIT_STALL[24], TCC_EA_RDREQ_IO_CREDIT_STALL[25], TCC_EA_RDREQ_GMI_CREDIT_STALL[25], TCC_EA_RDREQ_DRAM_CREDIT_STALL[25], TCC_EA_WRREQ_IO_CREDIT_STALL[25], TCC_EA_RDREQ_IO_CREDIT_STALL[26], TCC_EA_RDREQ_GMI_CREDIT_STALL[26], TCC_EA_RDREQ_DRAM_CREDIT_STALL[26], TCC_EA_WRREQ_IO_CREDIT_STALL[26], TCC_EA_RDREQ_IO_CREDIT_STALL[27], TCC_EA_RDREQ_GMI_CREDIT_STALL[27], TCC_EA_RDREQ_DRAM_CREDIT_STALL[27], TCC_EA_WRREQ_IO_CREDIT_STALL[27], TCC_EA_RDREQ_IO_CREDIT_STALL[28], TCC_EA_RDREQ_GMI_CREDIT_STALL[28], TCC_EA_RDREQ_DRAM_CREDIT_STALL[28], TCC_EA_WRREQ_IO_CREDIT_STALL[28], TCC_EA_RDREQ_IO_CREDIT_STALL[29], TCC_EA_RDREQ_GMI_CREDIT_STALL[29], TCC_EA_RDREQ_DRAM_CREDIT_STALL[29], TCC_EA_WRREQ_IO_CREDIT_STALL[29], TCC_EA_RDREQ_IO_CREDIT_STALL[30], TCC_EA_RDREQ_GMI_CREDIT_STALL[30], TCC_EA_RDREQ_DRAM_CREDIT_STALL[30], TCC_EA_WRREQ_IO_CREDIT_STALL[30], TCC_EA_RDREQ_IO_CREDIT_STALL[31], TCC_EA_RDREQ_GMI_CREDIT_STALL[31], TCC_EA_RDREQ_DRAM_CREDIT_STALL[31], TCC_EA_WRREQ_IO_CREDIT_STALL[31], CPC_ME1_BUSY_FOR_PACKET_DECODE, CPC_CPC_UTCL2IU_STALL, SPI_RA_WAVE_SIMD_FULL_CSN, SPI_RA_VGPR_SIMD_FULL_CSN, GRBM_COUNT, GRBM_GUI_ACTIVE + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154558_789607/input0_results_241022_154558 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/SQ_LEVEL_WAVES.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_0.txt + |-> [rocprof] RPL: on '241022_154558' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_0.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154558_789792' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154558_789792/input0_results_241022_154558' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154558_789792/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 114 metrics + |-> [rocprof] SQ_INSTS_VALU_INT64, SQ_INSTS_FLAT, SQ_INSTS_GDS, SQ_INSTS_EXP_GDS, SQ_INSTS_BRANCH, SQ_INSTS_SENDMSG, SQ_WAIT_ANY, SQ_WAIT_INST_ANY, TA_ADDR_STALLED_BY_TD_CYCLES_sum, TA_DATA_STALLED_BY_TC_CYCLES_sum, TCP_TCP_LATENCY_sum, TCP_TCC_READ_REQ_LATENCY_sum, TCP_TCC_WRITE_REQ_LATENCY_sum, TCP_TCC_READ_REQ_sum, TCC_EA_WRREQ_GMI_CREDIT_STALL[0], TCC_EA_WRREQ_DRAM_CREDIT_STALL[0], TCC_TOO_MANY_EA_WRREQS_STALL[0], TCC_EA_WRREQ_GMI_CREDIT_STALL[1], TCC_EA_WRREQ_DRAM_CREDIT_STALL[1], TCC_TOO_MANY_EA_WRREQS_STALL[1], TCC_EA_WRREQ_GMI_CREDIT_STALL[2], TCC_EA_WRREQ_DRAM_CREDIT_STALL[2], TCC_TOO_MANY_EA_WRREQS_STALL[2], TCC_EA_WRREQ_GMI_CREDIT_STALL[3], TCC_EA_WRREQ_DRAM_CREDIT_STALL[3], TCC_TOO_MANY_EA_WRREQS_STALL[3], TCC_EA_WRREQ_GMI_CREDIT_STALL[4], TCC_EA_WRREQ_DRAM_CREDIT_STALL[4], TCC_TOO_MANY_EA_WRREQS_STALL[4], TCC_EA_WRREQ_GMI_CREDIT_STALL[5], TCC_EA_WRREQ_DRAM_CREDIT_STALL[5], TCC_TOO_MANY_EA_WRREQS_STALL[5], TCC_EA_WRREQ_GMI_CREDIT_STALL[6], TCC_EA_WRREQ_DRAM_CREDIT_STALL[6], TCC_TOO_MANY_EA_WRREQS_STALL[6], TCC_EA_WRREQ_GMI_CREDIT_STALL[7], TCC_EA_WRREQ_DRAM_CREDIT_STALL[7], TCC_TOO_MANY_EA_WRREQS_STALL[7], TCC_EA_WRREQ_GMI_CREDIT_STALL[8], TCC_EA_WRREQ_DRAM_CREDIT_STALL[8], TCC_TOO_MANY_EA_WRREQS_STALL[8], TCC_EA_WRREQ_GMI_CREDIT_STALL[9], TCC_EA_WRREQ_DRAM_CREDIT_STALL[9], TCC_TOO_MANY_EA_WRREQS_STALL[9], TCC_EA_WRREQ_GMI_CREDIT_STALL[10], TCC_EA_WRREQ_DRAM_CREDIT_STALL[10], TCC_TOO_MANY_EA_WRREQS_STALL[10], TCC_EA_WRREQ_GMI_CREDIT_STALL[11], TCC_EA_WRREQ_DRAM_CREDIT_STALL[11], TCC_TOO_MANY_EA_WRREQS_STALL[11], TCC_EA_WRREQ_GMI_CREDIT_STALL[12], TCC_EA_WRREQ_DRAM_CREDIT_STALL[12], TCC_TOO_MANY_EA_WRREQS_STALL[12], TCC_EA_WRREQ_GMI_CREDIT_STALL[13], TCC_EA_WRREQ_DRAM_CREDIT_STALL[13], TCC_TOO_MANY_EA_WRREQS_STALL[13], TCC_EA_WRREQ_GMI_CREDIT_STALL[14], TCC_EA_WRREQ_DRAM_CREDIT_STALL[14], TCC_TOO_MANY_EA_WRREQS_STALL[14], TCC_EA_WRREQ_GMI_CREDIT_STALL[15], TCC_EA_WRREQ_DRAM_CREDIT_STALL[15], TCC_TOO_MANY_EA_WRREQS_STALL[15], TCC_EA_WRREQ_GMI_CREDIT_STALL[16], TCC_EA_WRREQ_DRAM_CREDIT_STALL[16], TCC_TOO_MANY_EA_WRREQS_STALL[16], TCC_EA_WRREQ_GMI_CREDIT_STALL[17], TCC_EA_WRREQ_DRAM_CREDIT_STALL[17], TCC_TOO_MANY_EA_WRREQS_STALL[17], TCC_EA_WRREQ_GMI_CREDIT_STALL[18], TCC_EA_WRREQ_DRAM_CREDIT_STALL[18], TCC_TOO_MANY_EA_WRREQS_STALL[18], TCC_EA_WRREQ_GMI_CREDIT_STALL[19], TCC_EA_WRREQ_DRAM_CREDIT_STALL[19], TCC_TOO_MANY_EA_WRREQS_STALL[19], TCC_EA_WRREQ_GMI_CREDIT_STALL[20], TCC_EA_WRREQ_DRAM_CREDIT_STALL[20], TCC_TOO_MANY_EA_WRREQS_STALL[20], TCC_EA_WRREQ_GMI_CREDIT_STALL[21], TCC_EA_WRREQ_DRAM_CREDIT_STALL[21], TCC_TOO_MANY_EA_WRREQS_STALL[21], TCC_EA_WRREQ_GMI_CREDIT_STALL[22], TCC_EA_WRREQ_DRAM_CREDIT_STALL[22], TCC_TOO_MANY_EA_WRREQS_STALL[22], TCC_EA_WRREQ_GMI_CREDIT_STALL[23], TCC_EA_WRREQ_DRAM_CREDIT_STALL[23], TCC_TOO_MANY_EA_WRREQS_STALL[23], TCC_EA_WRREQ_GMI_CREDIT_STALL[24], TCC_EA_WRREQ_DRAM_CREDIT_STALL[24], TCC_TOO_MANY_EA_WRREQS_STALL[24], TCC_EA_WRREQ_GMI_CREDIT_STALL[25], TCC_EA_WRREQ_DRAM_CREDIT_STALL[25], TCC_TOO_MANY_EA_WRREQS_STALL[25], TCC_EA_WRREQ_GMI_CREDIT_STALL[26], TCC_EA_WRREQ_DRAM_CREDIT_STALL[26], TCC_TOO_MANY_EA_WRREQS_STALL[26], TCC_EA_WRREQ_GMI_CREDIT_STALL[27], TCC_EA_WRREQ_DRAM_CREDIT_STALL[27], TCC_TOO_MANY_EA_WRREQS_STALL[27], TCC_EA_WRREQ_GMI_CREDIT_STALL[28], TCC_EA_WRREQ_DRAM_CREDIT_STALL[28], TCC_TOO_MANY_EA_WRREQS_STALL[28], TCC_EA_WRREQ_GMI_CREDIT_STALL[29], TCC_EA_WRREQ_DRAM_CREDIT_STALL[29], TCC_TOO_MANY_EA_WRREQS_STALL[29], TCC_EA_WRREQ_GMI_CREDIT_STALL[30], TCC_EA_WRREQ_DRAM_CREDIT_STALL[30], TCC_TOO_MANY_EA_WRREQS_STALL[30], TCC_EA_WRREQ_GMI_CREDIT_STALL[31], TCC_EA_WRREQ_DRAM_CREDIT_STALL[31], TCC_TOO_MANY_EA_WRREQS_STALL[31], TCC_CYCLE_sum, CPC_ME1_DC0_SPI_BUSY, SPI_RA_SGPR_SIMD_FULL_CSN, SPI_RA_LDS_CU_FULL_CSN + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154558_789792/input0_results_241022_154558 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_0.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_1.txt + |-> [rocprof] RPL: on '241022_154559' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_1.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154559_789978' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154559_789978/input0_results_241022_154559' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154559_789978/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 20 metrics + |-> [rocprof] SQ_ACTIVE_INST_ANY, SQ_ACTIVE_INST_VMEM, SQ_ACTIVE_INST_LDS, SQ_ACTIVE_INST_VALU, SQ_ACTIVE_INST_SCA, SQ_ACTIVE_INST_EXP_GDS, SQ_ACTIVE_INST_MISC, SQ_ACTIVE_INST_FLAT, TA_FLAT_WAVEFRONTS_sum, TA_FLAT_READ_WAVEFRONTS_sum, TCP_TCC_WRITE_REQ_sum, TCP_TCC_ATOMIC_WITH_RET_REQ_sum, TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum, TCP_TCC_NC_READ_REQ_sum, TCC_BUSY_sum, TCC_PROBE_sum, TCC_PROBE_ALL_sum, TCC_NC_REQ_sum, SPI_RA_BAR_CU_FULL_CSN, SPI_RA_TGLIM_CU_FULL_CSN + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154559_789978/input0_results_241022_154559 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_1.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_2.txt + |-> [rocprof] RPL: on '241022_154600' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_2.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154600_790176' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154600_790176/input0_results_241022_154600' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154600_790176/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 20 metrics + |-> [rocprof] SQ_INST_CYCLES_VMEM_WR, SQ_INST_CYCLES_VMEM_RD, SQ_INST_CYCLES_SMEM, SQ_INST_CYCLES_SALU, SQ_THREAD_CYCLES_VALU, SQ_LDS_BANK_CONFLICT, SQ_LDS_ADDR_CONFLICT, SQ_LDS_UNALIGNED_STALL, TA_FLAT_WRITE_WAVEFRONTS_sum, TA_FLAT_ATOMIC_WAVEFRONTS_sum, TCP_TCC_NC_WRITE_REQ_sum, TCP_TCC_NC_ATOMIC_REQ_sum, TCP_TCC_UC_READ_REQ_sum, TCP_TCC_UC_WRITE_REQ_sum, TCC_UC_REQ_sum, TCC_CC_REQ_sum, TCC_RW_REQ_sum, TCC_REQ_sum, SPI_RA_WVLIM_STALL_CSN, SPI_SWC_CSC_WR + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154600_790176/input0_results_241022_154600 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_2.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_3.txt + |-> [rocprof] RPL: on '241022_154600' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_3.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154600_790361' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154600_790361/input0_results_241022_154600' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154600_790361/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 18 metrics + |-> [rocprof] SQ_WAVES_EQ_64, SQ_WAVES_LT_64, SQ_WAVES_LT_48, SQ_WAVES_LT_32, SQ_WAVES_LT_16, SQ_ITEMS, SQ_LDS_MEM_VIOLATIONS, SQ_LDS_ATOMIC_RETURN, TCP_TCC_UC_ATOMIC_REQ_sum, TCP_TCC_CC_READ_REQ_sum, TCP_TCC_CC_WRITE_REQ_sum, TCP_TCC_CC_ATOMIC_REQ_sum, TCC_STREAMING_REQ_sum, TCC_HIT_sum, TCC_MISS_sum, TCC_READ_sum, SPI_VWC_CSC_WR, SPI_RA_BULKY_CU_FULL_CSN + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154600_790361/input0_results_241022_154600 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_3.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_4.txt + |-> [rocprof] RPL: on '241022_154601' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_4.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154601_790548' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154601_790548/input0_results_241022_154601' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154601_790548/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 16 metrics + |-> [rocprof] SQ_LDS_IDX_ACTIVE, SQ_WAVES_RESTORED, SQ_WAVES_SAVED, SQ_INSTS_SMEM_NORM, SQ_INSTS_MFMA, SQ_INSTS_VALU_MFMA_I8, SQ_INSTS_VALU_MFMA_F16, SQ_INSTS_VALU_MFMA_BF16, TCP_TCC_RW_READ_REQ_sum, TCP_TCC_RW_WRITE_REQ_sum, TCP_TCC_RW_ATOMIC_REQ_sum, TCP_PENDING_STALL_CYCLES_sum, TCC_WRITE_sum, TCC_ATOMIC_sum, TCC_WRITEBACK_sum, TCC_EA_WRREQ_sum + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154601_790548/input0_results_241022_154601 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_4.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_5.txt + |-> [rocprof] RPL: on '241022_154602' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_5.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154602_790732' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154602_790732/input0_results_241022_154602' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154602_790732/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 12 metrics + |-> [rocprof] SQ_INSTS_VALU_MFMA_F32, SQ_INSTS_VALU_MFMA_F64, SQ_VALU_MFMA_BUSY_CYCLES, SQ_INSTS_FLAT_LDS_ONLY, SQ_INSTS_VALU_MFMA_MOPS_I8, SQ_INSTS_VALU_MFMA_MOPS_F16, SQ_INSTS_VALU_MFMA_MOPS_BF16, SQ_INSTS_VALU_MFMA_MOPS_F32, TCC_EA_WRREQ_64B_sum, TCC_EA_WR_UNCACHED_32B_sum, TCC_EA_WRREQ_DRAM_sum, TCC_EA_WRREQ_STALL_sum + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154602_790732/input0_results_241022_154602 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_5.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_6.txt + |-> [rocprof] RPL: on '241022_154602' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_6.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154602_790917' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154602_790917/input0_results_241022_154602' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154602_790917/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 12 metrics + |-> [rocprof] SQ_INSTS_VALU_MFMA_MOPS_F64, SQC_TC_INST_REQ, SQC_TC_DATA_READ_REQ, SQC_TC_DATA_WRITE_REQ, SQC_TC_DATA_ATOMIC_REQ, SQC_TC_STALL, SQC_TC_REQ, SQC_DCACHE_REQ_READ_16, TCC_EA_RDREQ_sum, TCC_EA_RDREQ_32B_sum, TCC_EA_RD_UNCACHED_32B_sum, TCC_EA_RDREQ_DRAM_sum + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154602_790917/input0_results_241022_154602 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_6.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_7.txt + |-> [rocprof] RPL: on '241022_154603' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_7.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154603_791101' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154603_791101/input0_results_241022_154603' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154603_791101/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 12 metrics + |-> [rocprof] SQC_ICACHE_REQ, SQC_ICACHE_HITS, SQC_ICACHE_MISSES, SQC_ICACHE_MISSES_DUPLICATE, SQC_DCACHE_INPUT_VALID_READYB, SQC_DCACHE_ATOMIC, SQC_DCACHE_REQ_READ_8, SQC_DCACHE_REQ, TCC_TAG_STALL_sum, TCC_NORMAL_WRITEBACK_sum, TCC_ALL_TC_OP_WB_WRITEBACK_sum, TCC_NORMAL_EVICT_sum + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154603_791101/input0_results_241022_154603 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_7.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_8.txt + |-> [rocprof] RPL: on '241022_154603' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_8.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154603_791285' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154603_791285/input0_results_241022_154603' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154603_791285/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 10 metrics + |-> [rocprof] SQC_DCACHE_HITS, SQC_DCACHE_MISSES, SQC_DCACHE_MISSES_DUPLICATE, SQC_DCACHE_REQ_READ_1, SQC_DCACHE_REQ_READ_2, SQC_DCACHE_REQ_READ_4, TCC_ALL_TC_OP_INV_EVICT_sum, TCC_TOO_MANY_EA_WRREQS_STALL_sum, TCC_EA_ATOMIC_sum, TCC_EA_RDREQ_LEVEL_sum + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154603_791285/input0_results_241022_154603 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_8.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/pmc_perf_9.txt + |-> [rocprof] RPL: on '241022_154604' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/pmc_perf_9.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154604_791469' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154604_791469/input0_results_241022_154604' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154604_791469/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 2 metrics + |-> [rocprof] TCC_EA_WRREQ_LEVEL_sum, TCC_EA_ATOMIC_LEVEL_sum + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154604_791469/input0_results_241022_154604 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/pmc_perf_9.csv' is generating + |-> [rocprof] +[profiling] Current input file: tests/workloads/multikernel/MI210/perfmon/timestamps.txt + |-> [rocprof] RPL: on '241022_154605' from '/opt/rocm-6.2.1' in '/home/zichguan/omniperf_test/omniperf' + |-> [rocprof] RPL: profiling '""./sample/vmem""' + |-> [rocprof] RPL: input file 'tests/workloads/multikernel/MI210/perfmon/timestamps.txt' + |-> [rocprof] RPL: output dir '/tmp/rpl_data_241022_154605_791667' + |-> [rocprof] RPL: result dir '/tmp/rpl_data_241022_154605_791667/input0_results_241022_154605' + |-> [rocprof] ROCProfiler: input from "/tmp/rpl_data_241022_154605_791667/input0.xml" + |-> [rocprof] gpu_index = + |-> [rocprof] kernel = + |-> [rocprof] range = + |-> [rocprof] 0 metrics + |-> [rocprof] + |-> [rocprof] ROCPRofiler: 7 contexts collected, output directory /tmp/rpl_data_241022_154605_791667/input0_results_241022_154605 + |-> [rocprof] File 'tests/workloads/multikernel/MI210/timestamps.csv' is generating + |-> [rocprof] +[roofline] Checking for roofline.csv in tests/workloads/multikernel/MI210 +[roofline] No roofline data found. Generating... diff --git a/tests/workloads/multikernel/MI200/perfmon/SQ_IFETCH_LEVEL.txt b/tests/workloads/multikernel/MI200/perfmon/SQ_IFETCH_LEVEL.txt new file mode 100644 index 000000000..bde9fa946 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/SQ_IFETCH_LEVEL.txt @@ -0,0 +1,5 @@ +pmc: SQ_WAVES SQ_IFETCH SQ_IFETCH_LEVEL SQ_ACCUM_PREV_HIRES SQ_BUSY_CU_CYCLES SQ_INSTS_VALU_CVT SQ_INSTS_VMEM_WR SQ_INSTS_VMEM_RD TA_TA_BUSY_sum TA_BUFFER_WAVEFRONTS_sum TD_TD_BUSY_sum TD_TC_STALL_sum TCP_GATE_EN1_sum TCP_GATE_EN2_sum TCP_TD_TCP_STALL_CYCLES_sum TCP_TCR_TCP_STALL_CYCLES_sum TCC_CYCLE[0] TCC_RW_REQ[0] TCC_HIT[0] TCC_MISS[0] TCC_CYCLE[1] TCC_RW_REQ[1] TCC_HIT[1] TCC_MISS[1] TCC_CYCLE[2] TCC_RW_REQ[2] TCC_HIT[2] TCC_MISS[2] TCC_CYCLE[3] TCC_RW_REQ[3] TCC_HIT[3] TCC_MISS[3] TCC_CYCLE[4] TCC_RW_REQ[4] TCC_HIT[4] TCC_MISS[4] TCC_CYCLE[5] TCC_RW_REQ[5] TCC_HIT[5] TCC_MISS[5] TCC_CYCLE[6] TCC_RW_REQ[6] TCC_HIT[6] TCC_MISS[6] TCC_CYCLE[7] TCC_RW_REQ[7] TCC_HIT[7] TCC_MISS[7] TCC_CYCLE[8] TCC_RW_REQ[8] TCC_HIT[8] TCC_MISS[8] TCC_CYCLE[9] TCC_RW_REQ[9] TCC_HIT[9] TCC_MISS[9] TCC_CYCLE[10] TCC_RW_REQ[10] TCC_HIT[10] TCC_MISS[10] TCC_CYCLE[11] TCC_RW_REQ[11] TCC_HIT[11] TCC_MISS[11] TCC_CYCLE[12] TCC_RW_REQ[12] TCC_HIT[12] TCC_MISS[12] TCC_CYCLE[13] TCC_RW_REQ[13] TCC_HIT[13] TCC_MISS[13] TCC_CYCLE[14] TCC_RW_REQ[14] TCC_HIT[14] TCC_MISS[14] TCC_CYCLE[15] TCC_RW_REQ[15] TCC_HIT[15] TCC_MISS[15] TCC_CYCLE[16] TCC_RW_REQ[16] TCC_HIT[16] TCC_MISS[16] TCC_CYCLE[17] TCC_RW_REQ[17] TCC_HIT[17] TCC_MISS[17] TCC_CYCLE[18] TCC_RW_REQ[18] TCC_HIT[18] TCC_MISS[18] TCC_CYCLE[19] TCC_RW_REQ[19] TCC_HIT[19] TCC_MISS[19] TCC_CYCLE[20] TCC_RW_REQ[20] TCC_HIT[20] TCC_MISS[20] TCC_CYCLE[21] TCC_RW_REQ[21] TCC_HIT[21] TCC_MISS[21] TCC_CYCLE[22] TCC_RW_REQ[22] TCC_HIT[22] TCC_MISS[22] TCC_CYCLE[23] TCC_RW_REQ[23] TCC_HIT[23] TCC_MISS[23] TCC_CYCLE[24] TCC_RW_REQ[24] TCC_HIT[24] TCC_MISS[24] TCC_CYCLE[25] TCC_RW_REQ[25] TCC_HIT[25] TCC_MISS[25] TCC_CYCLE[26] TCC_RW_REQ[26] TCC_HIT[26] TCC_MISS[26] TCC_CYCLE[27] TCC_RW_REQ[27] TCC_HIT[27] TCC_MISS[27] TCC_CYCLE[28] TCC_RW_REQ[28] TCC_HIT[28] TCC_MISS[28] TCC_CYCLE[29] TCC_RW_REQ[29] TCC_HIT[29] TCC_MISS[29] TCC_CYCLE[30] TCC_RW_REQ[30] TCC_HIT[30] TCC_MISS[30] TCC_CYCLE[31] TCC_RW_REQ[31] TCC_HIT[31] TCC_MISS[31] CPC_CPC_STAT_BUSY CPC_CPC_STAT_IDLE CPF_CPF_STAT_BUSY CPF_CPF_STAT_STALL SPI_CSN_WINDOW_VALID SPI_CSN_BUSY GRBM_COUNT GRBM_GUI_ACTIVE + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_LDS.txt b/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_LDS.txt new file mode 100644 index 000000000..487719a26 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_LDS.txt @@ -0,0 +1,5 @@ +pmc: SQ_INSTS_LDS SQ_INST_LEVEL_LDS SQ_ACCUM_PREV_HIRES SQ_INSTS_SALU SQ_INSTS_VSKIPPED SQ_INSTS SQ_INSTS_VALU SQ_INSTS_VALU_ADD_F16 TA_BUFFER_READ_WAVEFRONTS_sum TA_BUFFER_WRITE_WAVEFRONTS_sum TD_SPI_STALL_sum TD_LOAD_WAVEFRONT_sum TCP_READ_TAGCONFLICT_STALL_CYCLES_sum TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum TCP_TA_TCP_STATE_READ_sum TCC_REQ[0] TCC_READ[0] TCC_WRITE[0] TCC_ATOMIC[0] TCC_REQ[1] TCC_READ[1] TCC_WRITE[1] TCC_ATOMIC[1] TCC_REQ[2] TCC_READ[2] TCC_WRITE[2] TCC_ATOMIC[2] TCC_REQ[3] TCC_READ[3] TCC_WRITE[3] TCC_ATOMIC[3] TCC_REQ[4] TCC_READ[4] TCC_WRITE[4] TCC_ATOMIC[4] TCC_REQ[5] TCC_READ[5] TCC_WRITE[5] TCC_ATOMIC[5] TCC_REQ[6] TCC_READ[6] TCC_WRITE[6] TCC_ATOMIC[6] TCC_REQ[7] TCC_READ[7] TCC_WRITE[7] TCC_ATOMIC[7] TCC_REQ[8] TCC_READ[8] TCC_WRITE[8] TCC_ATOMIC[8] TCC_REQ[9] TCC_READ[9] TCC_WRITE[9] TCC_ATOMIC[9] TCC_REQ[10] TCC_READ[10] TCC_WRITE[10] TCC_ATOMIC[10] TCC_REQ[11] TCC_READ[11] TCC_WRITE[11] TCC_ATOMIC[11] TCC_REQ[12] TCC_READ[12] TCC_WRITE[12] TCC_ATOMIC[12] TCC_REQ[13] TCC_READ[13] TCC_WRITE[13] TCC_ATOMIC[13] TCC_REQ[14] TCC_READ[14] TCC_WRITE[14] TCC_ATOMIC[14] TCC_REQ[15] TCC_READ[15] TCC_WRITE[15] TCC_ATOMIC[15] TCC_REQ[16] TCC_READ[16] TCC_WRITE[16] TCC_ATOMIC[16] TCC_REQ[17] TCC_READ[17] TCC_WRITE[17] TCC_ATOMIC[17] TCC_REQ[18] TCC_READ[18] TCC_WRITE[18] TCC_ATOMIC[18] TCC_REQ[19] TCC_READ[19] TCC_WRITE[19] TCC_ATOMIC[19] TCC_REQ[20] TCC_READ[20] TCC_WRITE[20] TCC_ATOMIC[20] TCC_REQ[21] TCC_READ[21] TCC_WRITE[21] TCC_ATOMIC[21] TCC_REQ[22] TCC_READ[22] TCC_WRITE[22] TCC_ATOMIC[22] TCC_REQ[23] TCC_READ[23] TCC_WRITE[23] TCC_ATOMIC[23] TCC_REQ[24] TCC_READ[24] TCC_WRITE[24] TCC_ATOMIC[24] TCC_REQ[25] TCC_READ[25] TCC_WRITE[25] TCC_ATOMIC[25] TCC_REQ[26] TCC_READ[26] TCC_WRITE[26] TCC_ATOMIC[26] TCC_REQ[27] TCC_READ[27] TCC_WRITE[27] TCC_ATOMIC[27] TCC_REQ[28] TCC_READ[28] TCC_WRITE[28] TCC_ATOMIC[28] TCC_REQ[29] TCC_READ[29] TCC_WRITE[29] TCC_ATOMIC[29] TCC_REQ[30] TCC_READ[30] TCC_WRITE[30] TCC_ATOMIC[30] TCC_REQ[31] TCC_READ[31] TCC_WRITE[31] TCC_ATOMIC[31] CPC_CPC_TCIU_BUSY CPC_CPC_TCIU_IDLE CPF_CPF_TCIU_BUSY CPF_CPF_TCIU_STALL SPI_CSN_NUM_THREADGROUPS SPI_CSN_WAVE GRBM_SPI_BUSY + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_SMEM.txt b/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_SMEM.txt new file mode 100644 index 000000000..fff8e8f69 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_SMEM.txt @@ -0,0 +1,5 @@ +pmc: SQ_INSTS_SMEM SQ_INST_LEVEL_SMEM SQ_ACCUM_PREV_HIRES SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 TA_BUFFER_ATOMIC_WAVEFRONTS_sum TA_BUFFER_TOTAL_CYCLES_sum TD_ATOMIC_WAVEFRONT_sum TD_STORE_WAVEFRONT_sum TCP_VOLATILE_sum TCP_TOTAL_ACCESSES_sum TCP_TOTAL_READ_sum TCP_TOTAL_WRITE_sum TCC_EA_RDREQ[0] TCC_EA_RDREQ_32B[0] TCC_EA_WRREQ[0] TCC_EA_WRREQ_64B[0] TCC_EA_RDREQ[1] TCC_EA_RDREQ_32B[1] TCC_EA_WRREQ[1] TCC_EA_WRREQ_64B[1] TCC_EA_RDREQ[2] TCC_EA_RDREQ_32B[2] TCC_EA_WRREQ[2] TCC_EA_WRREQ_64B[2] TCC_EA_RDREQ[3] TCC_EA_RDREQ_32B[3] TCC_EA_WRREQ[3] TCC_EA_WRREQ_64B[3] TCC_EA_RDREQ[4] TCC_EA_RDREQ_32B[4] TCC_EA_WRREQ[4] TCC_EA_WRREQ_64B[4] TCC_EA_RDREQ[5] TCC_EA_RDREQ_32B[5] TCC_EA_WRREQ[5] TCC_EA_WRREQ_64B[5] TCC_EA_RDREQ[6] TCC_EA_RDREQ_32B[6] TCC_EA_WRREQ[6] TCC_EA_WRREQ_64B[6] TCC_EA_RDREQ[7] TCC_EA_RDREQ_32B[7] TCC_EA_WRREQ[7] TCC_EA_WRREQ_64B[7] TCC_EA_RDREQ[8] TCC_EA_RDREQ_32B[8] TCC_EA_WRREQ[8] TCC_EA_WRREQ_64B[8] TCC_EA_RDREQ[9] TCC_EA_RDREQ_32B[9] TCC_EA_WRREQ[9] TCC_EA_WRREQ_64B[9] TCC_EA_RDREQ[10] TCC_EA_RDREQ_32B[10] TCC_EA_WRREQ[10] TCC_EA_WRREQ_64B[10] TCC_EA_RDREQ[11] TCC_EA_RDREQ_32B[11] TCC_EA_WRREQ[11] TCC_EA_WRREQ_64B[11] TCC_EA_RDREQ[12] TCC_EA_RDREQ_32B[12] TCC_EA_WRREQ[12] TCC_EA_WRREQ_64B[12] TCC_EA_RDREQ[13] TCC_EA_RDREQ_32B[13] TCC_EA_WRREQ[13] TCC_EA_WRREQ_64B[13] TCC_EA_RDREQ[14] TCC_EA_RDREQ_32B[14] TCC_EA_WRREQ[14] TCC_EA_WRREQ_64B[14] TCC_EA_RDREQ[15] TCC_EA_RDREQ_32B[15] TCC_EA_WRREQ[15] TCC_EA_WRREQ_64B[15] TCC_EA_RDREQ[16] TCC_EA_RDREQ_32B[16] TCC_EA_WRREQ[16] TCC_EA_WRREQ_64B[16] TCC_EA_RDREQ[17] TCC_EA_RDREQ_32B[17] TCC_EA_WRREQ[17] TCC_EA_WRREQ_64B[17] TCC_EA_RDREQ[18] TCC_EA_RDREQ_32B[18] TCC_EA_WRREQ[18] TCC_EA_WRREQ_64B[18] TCC_EA_RDREQ[19] TCC_EA_RDREQ_32B[19] TCC_EA_WRREQ[19] TCC_EA_WRREQ_64B[19] TCC_EA_RDREQ[20] TCC_EA_RDREQ_32B[20] TCC_EA_WRREQ[20] TCC_EA_WRREQ_64B[20] TCC_EA_RDREQ[21] TCC_EA_RDREQ_32B[21] TCC_EA_WRREQ[21] TCC_EA_WRREQ_64B[21] TCC_EA_RDREQ[22] TCC_EA_RDREQ_32B[22] TCC_EA_WRREQ[22] TCC_EA_WRREQ_64B[22] TCC_EA_RDREQ[23] TCC_EA_RDREQ_32B[23] TCC_EA_WRREQ[23] TCC_EA_WRREQ_64B[23] TCC_EA_RDREQ[24] TCC_EA_RDREQ_32B[24] TCC_EA_WRREQ[24] TCC_EA_WRREQ_64B[24] TCC_EA_RDREQ[25] TCC_EA_RDREQ_32B[25] TCC_EA_WRREQ[25] TCC_EA_WRREQ_64B[25] TCC_EA_RDREQ[26] TCC_EA_RDREQ_32B[26] TCC_EA_WRREQ[26] TCC_EA_WRREQ_64B[26] TCC_EA_RDREQ[27] TCC_EA_RDREQ_32B[27] TCC_EA_WRREQ[27] TCC_EA_WRREQ_64B[27] TCC_EA_RDREQ[28] TCC_EA_RDREQ_32B[28] TCC_EA_WRREQ[28] TCC_EA_WRREQ_64B[28] TCC_EA_RDREQ[29] TCC_EA_RDREQ_32B[29] TCC_EA_WRREQ[29] TCC_EA_WRREQ_64B[29] TCC_EA_RDREQ[30] TCC_EA_RDREQ_32B[30] TCC_EA_WRREQ[30] TCC_EA_WRREQ_64B[30] TCC_EA_RDREQ[31] TCC_EA_RDREQ_32B[31] TCC_EA_WRREQ[31] TCC_EA_WRREQ_64B[31] CPC_CPC_STAT_STALL CPC_UTCL1_STALL_ON_TRANSLATION CPF_CPF_STAT_IDLE CPF_CPF_TCIU_IDLE SPI_RA_REQ_NO_ALLOC SPI_RA_REQ_NO_ALLOC_CSN + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_VMEM.txt b/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_VMEM.txt new file mode 100644 index 000000000..9e8e85409 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/SQ_INST_LEVEL_VMEM.txt @@ -0,0 +1,5 @@ +pmc: SQ_INSTS_VMEM SQ_INST_LEVEL_VMEM SQ_ACCUM_PREV_HIRES SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32 SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 TA_BUFFER_COALESCED_READ_CYCLES_sum TA_BUFFER_COALESCED_WRITE_CYCLES_sum TD_COALESCABLE_WAVEFRONT_sum TCP_TOTAL_ATOMIC_WITH_RET_sum TCP_TOTAL_ATOMIC_WITHOUT_RET_sum TCP_TOTAL_WRITEBACK_INVALIDATES_sum TCP_TOTAL_CACHE_ACCESSES_sum TCC_EA_ATOMIC[0] TCC_EA_RDREQ_LEVEL[0] TCC_EA_WRREQ_LEVEL[0] TCC_EA_ATOMIC_LEVEL[0] TCC_EA_ATOMIC[1] TCC_EA_RDREQ_LEVEL[1] TCC_EA_WRREQ_LEVEL[1] TCC_EA_ATOMIC_LEVEL[1] TCC_EA_ATOMIC[2] TCC_EA_RDREQ_LEVEL[2] TCC_EA_WRREQ_LEVEL[2] TCC_EA_ATOMIC_LEVEL[2] TCC_EA_ATOMIC[3] TCC_EA_RDREQ_LEVEL[3] TCC_EA_WRREQ_LEVEL[3] TCC_EA_ATOMIC_LEVEL[3] TCC_EA_ATOMIC[4] TCC_EA_RDREQ_LEVEL[4] TCC_EA_WRREQ_LEVEL[4] TCC_EA_ATOMIC_LEVEL[4] TCC_EA_ATOMIC[5] TCC_EA_RDREQ_LEVEL[5] TCC_EA_WRREQ_LEVEL[5] TCC_EA_ATOMIC_LEVEL[5] TCC_EA_ATOMIC[6] TCC_EA_RDREQ_LEVEL[6] TCC_EA_WRREQ_LEVEL[6] TCC_EA_ATOMIC_LEVEL[6] TCC_EA_ATOMIC[7] TCC_EA_RDREQ_LEVEL[7] TCC_EA_WRREQ_LEVEL[7] TCC_EA_ATOMIC_LEVEL[7] TCC_EA_ATOMIC[8] TCC_EA_RDREQ_LEVEL[8] TCC_EA_WRREQ_LEVEL[8] TCC_EA_ATOMIC_LEVEL[8] TCC_EA_ATOMIC[9] TCC_EA_RDREQ_LEVEL[9] TCC_EA_WRREQ_LEVEL[9] TCC_EA_ATOMIC_LEVEL[9] TCC_EA_ATOMIC[10] TCC_EA_RDREQ_LEVEL[10] TCC_EA_WRREQ_LEVEL[10] TCC_EA_ATOMIC_LEVEL[10] TCC_EA_ATOMIC[11] TCC_EA_RDREQ_LEVEL[11] TCC_EA_WRREQ_LEVEL[11] TCC_EA_ATOMIC_LEVEL[11] TCC_EA_ATOMIC[12] TCC_EA_RDREQ_LEVEL[12] TCC_EA_WRREQ_LEVEL[12] TCC_EA_ATOMIC_LEVEL[12] TCC_EA_ATOMIC[13] TCC_EA_RDREQ_LEVEL[13] TCC_EA_WRREQ_LEVEL[13] TCC_EA_ATOMIC_LEVEL[13] TCC_EA_ATOMIC[14] TCC_EA_RDREQ_LEVEL[14] TCC_EA_WRREQ_LEVEL[14] TCC_EA_ATOMIC_LEVEL[14] TCC_EA_ATOMIC[15] TCC_EA_RDREQ_LEVEL[15] TCC_EA_WRREQ_LEVEL[15] TCC_EA_ATOMIC_LEVEL[15] TCC_EA_ATOMIC[16] TCC_EA_RDREQ_LEVEL[16] TCC_EA_WRREQ_LEVEL[16] TCC_EA_ATOMIC_LEVEL[16] TCC_EA_ATOMIC[17] TCC_EA_RDREQ_LEVEL[17] TCC_EA_WRREQ_LEVEL[17] TCC_EA_ATOMIC_LEVEL[17] TCC_EA_ATOMIC[18] TCC_EA_RDREQ_LEVEL[18] TCC_EA_WRREQ_LEVEL[18] TCC_EA_ATOMIC_LEVEL[18] TCC_EA_ATOMIC[19] TCC_EA_RDREQ_LEVEL[19] TCC_EA_WRREQ_LEVEL[19] TCC_EA_ATOMIC_LEVEL[19] TCC_EA_ATOMIC[20] TCC_EA_RDREQ_LEVEL[20] TCC_EA_WRREQ_LEVEL[20] TCC_EA_ATOMIC_LEVEL[20] TCC_EA_ATOMIC[21] TCC_EA_RDREQ_LEVEL[21] TCC_EA_WRREQ_LEVEL[21] TCC_EA_ATOMIC_LEVEL[21] TCC_EA_ATOMIC[22] TCC_EA_RDREQ_LEVEL[22] TCC_EA_WRREQ_LEVEL[22] TCC_EA_ATOMIC_LEVEL[22] TCC_EA_ATOMIC[23] TCC_EA_RDREQ_LEVEL[23] TCC_EA_WRREQ_LEVEL[23] TCC_EA_ATOMIC_LEVEL[23] TCC_EA_ATOMIC[24] TCC_EA_RDREQ_LEVEL[24] TCC_EA_WRREQ_LEVEL[24] TCC_EA_ATOMIC_LEVEL[24] TCC_EA_ATOMIC[25] TCC_EA_RDREQ_LEVEL[25] TCC_EA_WRREQ_LEVEL[25] TCC_EA_ATOMIC_LEVEL[25] TCC_EA_ATOMIC[26] TCC_EA_RDREQ_LEVEL[26] TCC_EA_WRREQ_LEVEL[26] TCC_EA_ATOMIC_LEVEL[26] TCC_EA_ATOMIC[27] TCC_EA_RDREQ_LEVEL[27] TCC_EA_WRREQ_LEVEL[27] TCC_EA_ATOMIC_LEVEL[27] TCC_EA_ATOMIC[28] TCC_EA_RDREQ_LEVEL[28] TCC_EA_WRREQ_LEVEL[28] TCC_EA_ATOMIC_LEVEL[28] TCC_EA_ATOMIC[29] TCC_EA_RDREQ_LEVEL[29] TCC_EA_WRREQ_LEVEL[29] TCC_EA_ATOMIC_LEVEL[29] TCC_EA_ATOMIC[30] TCC_EA_RDREQ_LEVEL[30] TCC_EA_WRREQ_LEVEL[30] TCC_EA_ATOMIC_LEVEL[30] TCC_EA_ATOMIC[31] TCC_EA_RDREQ_LEVEL[31] TCC_EA_WRREQ_LEVEL[31] TCC_EA_ATOMIC_LEVEL[31] CPC_CPC_UTCL2IU_BUSY CPC_CPC_UTCL2IU_IDLE CPF_CMP_UTCL1_STALL_ON_TRANSLATION SPI_RA_RES_STALL_CSN SPI_RA_TMP_STALL_CSN + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/SQ_LEVEL_WAVES.txt b/tests/workloads/multikernel/MI200/perfmon/SQ_LEVEL_WAVES.txt new file mode 100644 index 000000000..91c1452d0 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/SQ_LEVEL_WAVES.txt @@ -0,0 +1,5 @@ +pmc: SQ_CYCLES SQ_WAVES SQ_WAVE_CYCLES SQ_BUSY_CYCLES SQ_LEVEL_WAVES SQ_ACCUM_PREV_HIRES SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_INT32 TA_ADDR_STALLED_BY_TC_CYCLES_sum TA_TOTAL_WAVEFRONTS_sum TCP_UTCL1_TRANSLATION_MISS_sum TCP_UTCL1_TRANSLATION_HIT_sum TCP_UTCL1_PERMISSION_MISS_sum TCP_UTCL1_REQUEST_sum TCC_EA_RDREQ_IO_CREDIT_STALL[0] TCC_EA_RDREQ_GMI_CREDIT_STALL[0] TCC_EA_RDREQ_DRAM_CREDIT_STALL[0] TCC_EA_WRREQ_IO_CREDIT_STALL[0] TCC_EA_RDREQ_IO_CREDIT_STALL[1] TCC_EA_RDREQ_GMI_CREDIT_STALL[1] TCC_EA_RDREQ_DRAM_CREDIT_STALL[1] TCC_EA_WRREQ_IO_CREDIT_STALL[1] TCC_EA_RDREQ_IO_CREDIT_STALL[2] TCC_EA_RDREQ_GMI_CREDIT_STALL[2] TCC_EA_RDREQ_DRAM_CREDIT_STALL[2] TCC_EA_WRREQ_IO_CREDIT_STALL[2] TCC_EA_RDREQ_IO_CREDIT_STALL[3] TCC_EA_RDREQ_GMI_CREDIT_STALL[3] TCC_EA_RDREQ_DRAM_CREDIT_STALL[3] TCC_EA_WRREQ_IO_CREDIT_STALL[3] TCC_EA_RDREQ_IO_CREDIT_STALL[4] TCC_EA_RDREQ_GMI_CREDIT_STALL[4] TCC_EA_RDREQ_DRAM_CREDIT_STALL[4] TCC_EA_WRREQ_IO_CREDIT_STALL[4] TCC_EA_RDREQ_IO_CREDIT_STALL[5] TCC_EA_RDREQ_GMI_CREDIT_STALL[5] TCC_EA_RDREQ_DRAM_CREDIT_STALL[5] TCC_EA_WRREQ_IO_CREDIT_STALL[5] TCC_EA_RDREQ_IO_CREDIT_STALL[6] TCC_EA_RDREQ_GMI_CREDIT_STALL[6] TCC_EA_RDREQ_DRAM_CREDIT_STALL[6] TCC_EA_WRREQ_IO_CREDIT_STALL[6] TCC_EA_RDREQ_IO_CREDIT_STALL[7] TCC_EA_RDREQ_GMI_CREDIT_STALL[7] TCC_EA_RDREQ_DRAM_CREDIT_STALL[7] TCC_EA_WRREQ_IO_CREDIT_STALL[7] TCC_EA_RDREQ_IO_CREDIT_STALL[8] TCC_EA_RDREQ_GMI_CREDIT_STALL[8] TCC_EA_RDREQ_DRAM_CREDIT_STALL[8] TCC_EA_WRREQ_IO_CREDIT_STALL[8] TCC_EA_RDREQ_IO_CREDIT_STALL[9] TCC_EA_RDREQ_GMI_CREDIT_STALL[9] TCC_EA_RDREQ_DRAM_CREDIT_STALL[9] TCC_EA_WRREQ_IO_CREDIT_STALL[9] TCC_EA_RDREQ_IO_CREDIT_STALL[10] TCC_EA_RDREQ_GMI_CREDIT_STALL[10] TCC_EA_RDREQ_DRAM_CREDIT_STALL[10] TCC_EA_WRREQ_IO_CREDIT_STALL[10] TCC_EA_RDREQ_IO_CREDIT_STALL[11] TCC_EA_RDREQ_GMI_CREDIT_STALL[11] TCC_EA_RDREQ_DRAM_CREDIT_STALL[11] TCC_EA_WRREQ_IO_CREDIT_STALL[11] TCC_EA_RDREQ_IO_CREDIT_STALL[12] TCC_EA_RDREQ_GMI_CREDIT_STALL[12] TCC_EA_RDREQ_DRAM_CREDIT_STALL[12] TCC_EA_WRREQ_IO_CREDIT_STALL[12] TCC_EA_RDREQ_IO_CREDIT_STALL[13] TCC_EA_RDREQ_GMI_CREDIT_STALL[13] TCC_EA_RDREQ_DRAM_CREDIT_STALL[13] TCC_EA_WRREQ_IO_CREDIT_STALL[13] TCC_EA_RDREQ_IO_CREDIT_STALL[14] TCC_EA_RDREQ_GMI_CREDIT_STALL[14] TCC_EA_RDREQ_DRAM_CREDIT_STALL[14] TCC_EA_WRREQ_IO_CREDIT_STALL[14] TCC_EA_RDREQ_IO_CREDIT_STALL[15] TCC_EA_RDREQ_GMI_CREDIT_STALL[15] TCC_EA_RDREQ_DRAM_CREDIT_STALL[15] TCC_EA_WRREQ_IO_CREDIT_STALL[15] TCC_EA_RDREQ_IO_CREDIT_STALL[16] TCC_EA_RDREQ_GMI_CREDIT_STALL[16] TCC_EA_RDREQ_DRAM_CREDIT_STALL[16] TCC_EA_WRREQ_IO_CREDIT_STALL[16] TCC_EA_RDREQ_IO_CREDIT_STALL[17] TCC_EA_RDREQ_GMI_CREDIT_STALL[17] TCC_EA_RDREQ_DRAM_CREDIT_STALL[17] TCC_EA_WRREQ_IO_CREDIT_STALL[17] TCC_EA_RDREQ_IO_CREDIT_STALL[18] TCC_EA_RDREQ_GMI_CREDIT_STALL[18] TCC_EA_RDREQ_DRAM_CREDIT_STALL[18] TCC_EA_WRREQ_IO_CREDIT_STALL[18] TCC_EA_RDREQ_IO_CREDIT_STALL[19] TCC_EA_RDREQ_GMI_CREDIT_STALL[19] TCC_EA_RDREQ_DRAM_CREDIT_STALL[19] TCC_EA_WRREQ_IO_CREDIT_STALL[19] TCC_EA_RDREQ_IO_CREDIT_STALL[20] TCC_EA_RDREQ_GMI_CREDIT_STALL[20] TCC_EA_RDREQ_DRAM_CREDIT_STALL[20] TCC_EA_WRREQ_IO_CREDIT_STALL[20] TCC_EA_RDREQ_IO_CREDIT_STALL[21] TCC_EA_RDREQ_GMI_CREDIT_STALL[21] TCC_EA_RDREQ_DRAM_CREDIT_STALL[21] TCC_EA_WRREQ_IO_CREDIT_STALL[21] TCC_EA_RDREQ_IO_CREDIT_STALL[22] TCC_EA_RDREQ_GMI_CREDIT_STALL[22] TCC_EA_RDREQ_DRAM_CREDIT_STALL[22] TCC_EA_WRREQ_IO_CREDIT_STALL[22] TCC_EA_RDREQ_IO_CREDIT_STALL[23] TCC_EA_RDREQ_GMI_CREDIT_STALL[23] TCC_EA_RDREQ_DRAM_CREDIT_STALL[23] TCC_EA_WRREQ_IO_CREDIT_STALL[23] TCC_EA_RDREQ_IO_CREDIT_STALL[24] TCC_EA_RDREQ_GMI_CREDIT_STALL[24] TCC_EA_RDREQ_DRAM_CREDIT_STALL[24] TCC_EA_WRREQ_IO_CREDIT_STALL[24] TCC_EA_RDREQ_IO_CREDIT_STALL[25] TCC_EA_RDREQ_GMI_CREDIT_STALL[25] TCC_EA_RDREQ_DRAM_CREDIT_STALL[25] TCC_EA_WRREQ_IO_CREDIT_STALL[25] TCC_EA_RDREQ_IO_CREDIT_STALL[26] TCC_EA_RDREQ_GMI_CREDIT_STALL[26] TCC_EA_RDREQ_DRAM_CREDIT_STALL[26] TCC_EA_WRREQ_IO_CREDIT_STALL[26] TCC_EA_RDREQ_IO_CREDIT_STALL[27] TCC_EA_RDREQ_GMI_CREDIT_STALL[27] TCC_EA_RDREQ_DRAM_CREDIT_STALL[27] TCC_EA_WRREQ_IO_CREDIT_STALL[27] TCC_EA_RDREQ_IO_CREDIT_STALL[28] TCC_EA_RDREQ_GMI_CREDIT_STALL[28] TCC_EA_RDREQ_DRAM_CREDIT_STALL[28] TCC_EA_WRREQ_IO_CREDIT_STALL[28] TCC_EA_RDREQ_IO_CREDIT_STALL[29] TCC_EA_RDREQ_GMI_CREDIT_STALL[29] TCC_EA_RDREQ_DRAM_CREDIT_STALL[29] TCC_EA_WRREQ_IO_CREDIT_STALL[29] TCC_EA_RDREQ_IO_CREDIT_STALL[30] TCC_EA_RDREQ_GMI_CREDIT_STALL[30] TCC_EA_RDREQ_DRAM_CREDIT_STALL[30] TCC_EA_WRREQ_IO_CREDIT_STALL[30] TCC_EA_RDREQ_IO_CREDIT_STALL[31] TCC_EA_RDREQ_GMI_CREDIT_STALL[31] TCC_EA_RDREQ_DRAM_CREDIT_STALL[31] TCC_EA_WRREQ_IO_CREDIT_STALL[31] CPC_ME1_BUSY_FOR_PACKET_DECODE CPC_CPC_UTCL2IU_STALL SPI_RA_WAVE_SIMD_FULL_CSN SPI_RA_VGPR_SIMD_FULL_CSN GRBM_COUNT GRBM_GUI_ACTIVE + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_0.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_0.txt new file mode 100644 index 000000000..157b0fa30 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_0.txt @@ -0,0 +1,5 @@ +pmc: SQ_INSTS_VALU_INT64 SQ_INSTS_FLAT SQ_INSTS_GDS SQ_INSTS_EXP_GDS SQ_INSTS_BRANCH SQ_INSTS_SENDMSG SQ_WAIT_ANY SQ_WAIT_INST_ANY TA_ADDR_STALLED_BY_TD_CYCLES_sum TA_DATA_STALLED_BY_TC_CYCLES_sum TCP_TCP_LATENCY_sum TCP_TCC_READ_REQ_LATENCY_sum TCP_TCC_WRITE_REQ_LATENCY_sum TCP_TCC_READ_REQ_sum TCC_EA_WRREQ_GMI_CREDIT_STALL[0] TCC_EA_WRREQ_DRAM_CREDIT_STALL[0] TCC_TOO_MANY_EA_WRREQS_STALL[0] TCC_EA_WRREQ_GMI_CREDIT_STALL[1] TCC_EA_WRREQ_DRAM_CREDIT_STALL[1] TCC_TOO_MANY_EA_WRREQS_STALL[1] TCC_EA_WRREQ_GMI_CREDIT_STALL[2] TCC_EA_WRREQ_DRAM_CREDIT_STALL[2] TCC_TOO_MANY_EA_WRREQS_STALL[2] TCC_EA_WRREQ_GMI_CREDIT_STALL[3] TCC_EA_WRREQ_DRAM_CREDIT_STALL[3] TCC_TOO_MANY_EA_WRREQS_STALL[3] TCC_EA_WRREQ_GMI_CREDIT_STALL[4] TCC_EA_WRREQ_DRAM_CREDIT_STALL[4] TCC_TOO_MANY_EA_WRREQS_STALL[4] TCC_EA_WRREQ_GMI_CREDIT_STALL[5] TCC_EA_WRREQ_DRAM_CREDIT_STALL[5] TCC_TOO_MANY_EA_WRREQS_STALL[5] TCC_EA_WRREQ_GMI_CREDIT_STALL[6] TCC_EA_WRREQ_DRAM_CREDIT_STALL[6] TCC_TOO_MANY_EA_WRREQS_STALL[6] TCC_EA_WRREQ_GMI_CREDIT_STALL[7] TCC_EA_WRREQ_DRAM_CREDIT_STALL[7] TCC_TOO_MANY_EA_WRREQS_STALL[7] TCC_EA_WRREQ_GMI_CREDIT_STALL[8] TCC_EA_WRREQ_DRAM_CREDIT_STALL[8] TCC_TOO_MANY_EA_WRREQS_STALL[8] TCC_EA_WRREQ_GMI_CREDIT_STALL[9] TCC_EA_WRREQ_DRAM_CREDIT_STALL[9] TCC_TOO_MANY_EA_WRREQS_STALL[9] TCC_EA_WRREQ_GMI_CREDIT_STALL[10] TCC_EA_WRREQ_DRAM_CREDIT_STALL[10] TCC_TOO_MANY_EA_WRREQS_STALL[10] TCC_EA_WRREQ_GMI_CREDIT_STALL[11] TCC_EA_WRREQ_DRAM_CREDIT_STALL[11] TCC_TOO_MANY_EA_WRREQS_STALL[11] TCC_EA_WRREQ_GMI_CREDIT_STALL[12] TCC_EA_WRREQ_DRAM_CREDIT_STALL[12] TCC_TOO_MANY_EA_WRREQS_STALL[12] TCC_EA_WRREQ_GMI_CREDIT_STALL[13] TCC_EA_WRREQ_DRAM_CREDIT_STALL[13] TCC_TOO_MANY_EA_WRREQS_STALL[13] TCC_EA_WRREQ_GMI_CREDIT_STALL[14] TCC_EA_WRREQ_DRAM_CREDIT_STALL[14] TCC_TOO_MANY_EA_WRREQS_STALL[14] TCC_EA_WRREQ_GMI_CREDIT_STALL[15] TCC_EA_WRREQ_DRAM_CREDIT_STALL[15] TCC_TOO_MANY_EA_WRREQS_STALL[15] TCC_EA_WRREQ_GMI_CREDIT_STALL[16] TCC_EA_WRREQ_DRAM_CREDIT_STALL[16] TCC_TOO_MANY_EA_WRREQS_STALL[16] TCC_EA_WRREQ_GMI_CREDIT_STALL[17] TCC_EA_WRREQ_DRAM_CREDIT_STALL[17] TCC_TOO_MANY_EA_WRREQS_STALL[17] TCC_EA_WRREQ_GMI_CREDIT_STALL[18] TCC_EA_WRREQ_DRAM_CREDIT_STALL[18] TCC_TOO_MANY_EA_WRREQS_STALL[18] TCC_EA_WRREQ_GMI_CREDIT_STALL[19] TCC_EA_WRREQ_DRAM_CREDIT_STALL[19] TCC_TOO_MANY_EA_WRREQS_STALL[19] TCC_EA_WRREQ_GMI_CREDIT_STALL[20] TCC_EA_WRREQ_DRAM_CREDIT_STALL[20] TCC_TOO_MANY_EA_WRREQS_STALL[20] TCC_EA_WRREQ_GMI_CREDIT_STALL[21] TCC_EA_WRREQ_DRAM_CREDIT_STALL[21] TCC_TOO_MANY_EA_WRREQS_STALL[21] TCC_EA_WRREQ_GMI_CREDIT_STALL[22] TCC_EA_WRREQ_DRAM_CREDIT_STALL[22] TCC_TOO_MANY_EA_WRREQS_STALL[22] TCC_EA_WRREQ_GMI_CREDIT_STALL[23] TCC_EA_WRREQ_DRAM_CREDIT_STALL[23] TCC_TOO_MANY_EA_WRREQS_STALL[23] TCC_EA_WRREQ_GMI_CREDIT_STALL[24] TCC_EA_WRREQ_DRAM_CREDIT_STALL[24] TCC_TOO_MANY_EA_WRREQS_STALL[24] TCC_EA_WRREQ_GMI_CREDIT_STALL[25] TCC_EA_WRREQ_DRAM_CREDIT_STALL[25] TCC_TOO_MANY_EA_WRREQS_STALL[25] TCC_EA_WRREQ_GMI_CREDIT_STALL[26] TCC_EA_WRREQ_DRAM_CREDIT_STALL[26] TCC_TOO_MANY_EA_WRREQS_STALL[26] TCC_EA_WRREQ_GMI_CREDIT_STALL[27] TCC_EA_WRREQ_DRAM_CREDIT_STALL[27] TCC_TOO_MANY_EA_WRREQS_STALL[27] TCC_EA_WRREQ_GMI_CREDIT_STALL[28] TCC_EA_WRREQ_DRAM_CREDIT_STALL[28] TCC_TOO_MANY_EA_WRREQS_STALL[28] TCC_EA_WRREQ_GMI_CREDIT_STALL[29] TCC_EA_WRREQ_DRAM_CREDIT_STALL[29] TCC_TOO_MANY_EA_WRREQS_STALL[29] TCC_EA_WRREQ_GMI_CREDIT_STALL[30] TCC_EA_WRREQ_DRAM_CREDIT_STALL[30] TCC_TOO_MANY_EA_WRREQS_STALL[30] TCC_EA_WRREQ_GMI_CREDIT_STALL[31] TCC_EA_WRREQ_DRAM_CREDIT_STALL[31] TCC_TOO_MANY_EA_WRREQS_STALL[31] TCC_CYCLE_sum CPC_ME1_DC0_SPI_BUSY SPI_RA_SGPR_SIMD_FULL_CSN SPI_RA_LDS_CU_FULL_CSN + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_1.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_1.txt new file mode 100644 index 000000000..b621588d2 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_1.txt @@ -0,0 +1,5 @@ +pmc: SQ_ACTIVE_INST_ANY SQ_ACTIVE_INST_VMEM SQ_ACTIVE_INST_LDS SQ_ACTIVE_INST_VALU SQ_ACTIVE_INST_SCA SQ_ACTIVE_INST_EXP_GDS SQ_ACTIVE_INST_MISC SQ_ACTIVE_INST_FLAT TA_FLAT_WAVEFRONTS_sum TA_FLAT_READ_WAVEFRONTS_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum TCP_TCC_NC_READ_REQ_sum TCC_BUSY_sum TCC_PROBE_sum TCC_PROBE_ALL_sum TCC_NC_REQ_sum SPI_RA_BAR_CU_FULL_CSN SPI_RA_TGLIM_CU_FULL_CSN + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_2.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_2.txt new file mode 100644 index 000000000..c385f7a07 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_2.txt @@ -0,0 +1,5 @@ +pmc: SQ_INST_CYCLES_VMEM_WR SQ_INST_CYCLES_VMEM_RD SQ_INST_CYCLES_SMEM SQ_INST_CYCLES_SALU SQ_THREAD_CYCLES_VALU SQ_LDS_BANK_CONFLICT SQ_LDS_ADDR_CONFLICT SQ_LDS_UNALIGNED_STALL TA_FLAT_WRITE_WAVEFRONTS_sum TA_FLAT_ATOMIC_WAVEFRONTS_sum TCP_TCC_NC_WRITE_REQ_sum TCP_TCC_NC_ATOMIC_REQ_sum TCP_TCC_UC_READ_REQ_sum TCP_TCC_UC_WRITE_REQ_sum TCC_UC_REQ_sum TCC_CC_REQ_sum TCC_RW_REQ_sum TCC_REQ_sum SPI_RA_WVLIM_STALL_CSN SPI_SWC_CSC_WR + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_3.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_3.txt new file mode 100644 index 000000000..c201a09ec --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_3.txt @@ -0,0 +1,5 @@ +pmc: SQ_WAVES_EQ_64 SQ_WAVES_LT_64 SQ_WAVES_LT_48 SQ_WAVES_LT_32 SQ_WAVES_LT_16 SQ_ITEMS SQ_LDS_MEM_VIOLATIONS SQ_LDS_ATOMIC_RETURN TCP_TCC_UC_ATOMIC_REQ_sum TCP_TCC_CC_READ_REQ_sum TCP_TCC_CC_WRITE_REQ_sum TCP_TCC_CC_ATOMIC_REQ_sum TCC_STREAMING_REQ_sum TCC_HIT_sum TCC_MISS_sum TCC_READ_sum SPI_VWC_CSC_WR SPI_RA_BULKY_CU_FULL_CSN + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_4.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_4.txt new file mode 100644 index 000000000..dae157747 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_4.txt @@ -0,0 +1,5 @@ +pmc: SQ_LDS_IDX_ACTIVE SQ_WAVES_RESTORED SQ_WAVES_SAVED SQ_INSTS_SMEM_NORM SQ_INSTS_MFMA SQ_INSTS_VALU_MFMA_I8 SQ_INSTS_VALU_MFMA_F16 SQ_INSTS_VALU_MFMA_BF16 TCP_TCC_RW_READ_REQ_sum TCP_TCC_RW_WRITE_REQ_sum TCP_TCC_RW_ATOMIC_REQ_sum TCP_PENDING_STALL_CYCLES_sum TCC_WRITE_sum TCC_ATOMIC_sum TCC_WRITEBACK_sum TCC_EA_WRREQ_sum + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_5.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_5.txt new file mode 100644 index 000000000..6c5efd1c9 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_5.txt @@ -0,0 +1,5 @@ +pmc: SQ_INSTS_VALU_MFMA_F32 SQ_INSTS_VALU_MFMA_F64 SQ_VALU_MFMA_BUSY_CYCLES SQ_INSTS_FLAT_LDS_ONLY SQ_INSTS_VALU_MFMA_MOPS_I8 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 TCC_EA_WRREQ_64B_sum TCC_EA_WR_UNCACHED_32B_sum TCC_EA_WRREQ_DRAM_sum TCC_EA_WRREQ_STALL_sum + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_6.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_6.txt new file mode 100644 index 000000000..512ffbd89 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_6.txt @@ -0,0 +1,5 @@ +pmc: SQ_INSTS_VALU_MFMA_MOPS_F64 SQC_TC_INST_REQ SQC_TC_DATA_READ_REQ SQC_TC_DATA_WRITE_REQ SQC_TC_DATA_ATOMIC_REQ SQC_TC_STALL SQC_TC_REQ SQC_DCACHE_REQ_READ_16 TCC_EA_RDREQ_sum TCC_EA_RDREQ_32B_sum TCC_EA_RD_UNCACHED_32B_sum TCC_EA_RDREQ_DRAM_sum + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_7.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_7.txt new file mode 100644 index 000000000..e7479b0f4 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_7.txt @@ -0,0 +1,5 @@ +pmc: SQC_ICACHE_REQ SQC_ICACHE_HITS SQC_ICACHE_MISSES SQC_ICACHE_MISSES_DUPLICATE SQC_DCACHE_INPUT_VALID_READYB SQC_DCACHE_ATOMIC SQC_DCACHE_REQ_READ_8 SQC_DCACHE_REQ TCC_TAG_STALL_sum TCC_NORMAL_WRITEBACK_sum TCC_ALL_TC_OP_WB_WRITEBACK_sum TCC_NORMAL_EVICT_sum + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_8.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_8.txt new file mode 100644 index 000000000..4afa81372 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_8.txt @@ -0,0 +1,5 @@ +pmc: SQC_DCACHE_HITS SQC_DCACHE_MISSES SQC_DCACHE_MISSES_DUPLICATE SQC_DCACHE_REQ_READ_1 SQC_DCACHE_REQ_READ_2 SQC_DCACHE_REQ_READ_4 TCC_ALL_TC_OP_INV_EVICT_sum TCC_TOO_MANY_EA_WRREQS_STALL_sum TCC_EA_ATOMIC_sum TCC_EA_RDREQ_LEVEL_sum + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/pmc_perf_9.txt b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_9.txt new file mode 100644 index 000000000..5b7d3f852 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/pmc_perf_9.txt @@ -0,0 +1,5 @@ +pmc: TCC_EA_WRREQ_LEVEL_sum TCC_EA_ATOMIC_LEVEL_sum + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/perfmon/timestamps.txt b/tests/workloads/multikernel/MI200/perfmon/timestamps.txt new file mode 100644 index 000000000..676cca1b8 --- /dev/null +++ b/tests/workloads/multikernel/MI200/perfmon/timestamps.txt @@ -0,0 +1,5 @@ +pmc: + +gpu: +range: +kernel: diff --git a/tests/workloads/multikernel/MI200/pmc_dispatch_info.csv b/tests/workloads/multikernel/MI200/pmc_dispatch_info.csv new file mode 100644 index 000000000..4c8414487 --- /dev/null +++ b/tests/workloads/multikernel/MI200/pmc_dispatch_info.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID +0,__amd_rocclr_fillBufferAligned.kd,2 +1,"global_write(int*, int) [clone .kd]",2 +2,"generic_write(int*, int, int) [clone .kd]",2 +3,"global_read(int*, int) [clone .kd]",2 +4,"generic_read(int*, int, int) [clone .kd]",2 +5,"global_atomic(int*, int) [clone .kd]",2 +6,"generic_atomic(int*, int, int) [clone .kd]",2 diff --git a/tests/workloads/multikernel/MI200/pmc_perf.csv b/tests/workloads/multikernel/MI200/pmc_perf.csv new file mode 100644 index 000000000..b50d5c38f --- /dev/null +++ b/tests/workloads/multikernel/MI200/pmc_perf.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,obj,SQ_LDS_IDX_ACTIVE,SQ_WAVES_RESTORED,SQ_WAVES_SAVED,SQ_INSTS_SMEM_NORM,SQ_INSTS_MFMA,SQ_INSTS_VALU_MFMA_I8,SQ_INSTS_VALU_MFMA_F16,SQ_INSTS_VALU_MFMA_BF16,TCP_TCC_RW_READ_REQ_sum,TCP_TCC_RW_WRITE_REQ_sum,TCP_TCC_RW_ATOMIC_REQ_sum,TCP_PENDING_STALL_CYCLES_sum,TCC_WRITE_sum,TCC_ATOMIC_sum,TCC_WRITEBACK_sum,TCC_EA_WRREQ_sum,wave_size_1,obj_1,TCC_EA_WRREQ_LEVEL_sum,TCC_EA_ATOMIC_LEVEL_sum,wave_size_2,obj_2,SQ_ACTIVE_INST_ANY,SQ_ACTIVE_INST_VMEM,SQ_ACTIVE_INST_LDS,SQ_ACTIVE_INST_VALU,SQ_ACTIVE_INST_SCA,SQ_ACTIVE_INST_EXP_GDS,SQ_ACTIVE_INST_MISC,SQ_ACTIVE_INST_FLAT,TA_FLAT_WAVEFRONTS_sum,TA_FLAT_READ_WAVEFRONTS_sum,TCP_TCC_WRITE_REQ_sum,TCP_TCC_ATOMIC_WITH_RET_REQ_sum,TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum,TCP_TCC_NC_READ_REQ_sum,TCC_BUSY_sum,TCC_PROBE_sum,TCC_PROBE_ALL_sum,TCC_NC_REQ_sum,SPI_RA_BAR_CU_FULL_CSN,SPI_RA_TGLIM_CU_FULL_CSN,wave_size_3,obj_3,SQ_INST_CYCLES_VMEM_WR,SQ_INST_CYCLES_VMEM_RD,SQ_INST_CYCLES_SMEM,SQ_INST_CYCLES_SALU,SQ_THREAD_CYCLES_VALU,SQ_LDS_BANK_CONFLICT,SQ_LDS_ADDR_CONFLICT,SQ_LDS_UNALIGNED_STALL,TA_FLAT_WRITE_WAVEFRONTS_sum,TA_FLAT_ATOMIC_WAVEFRONTS_sum,TCP_TCC_NC_WRITE_REQ_sum,TCP_TCC_NC_ATOMIC_REQ_sum,TCP_TCC_UC_READ_REQ_sum,TCP_TCC_UC_WRITE_REQ_sum,TCC_UC_REQ_sum,TCC_CC_REQ_sum,TCC_RW_REQ_sum,TCC_REQ_sum,SPI_RA_WVLIM_STALL_CSN,SPI_SWC_CSC_WR,wave_size_4,obj_4,SQ_WAVES_EQ_64,SQ_WAVES_LT_64,SQ_WAVES_LT_48,SQ_WAVES_LT_32,SQ_WAVES_LT_16,SQ_ITEMS,SQ_LDS_MEM_VIOLATIONS,SQ_LDS_ATOMIC_RETURN,TCP_TCC_UC_ATOMIC_REQ_sum,TCP_TCC_CC_READ_REQ_sum,TCP_TCC_CC_WRITE_REQ_sum,TCP_TCC_CC_ATOMIC_REQ_sum,TCC_STREAMING_REQ_sum,TCC_HIT_sum,TCC_MISS_sum,TCC_READ_sum,SPI_VWC_CSC_WR,SPI_RA_BULKY_CU_FULL_CSN,wave_size_5,obj_5,SQC_ICACHE_REQ,SQC_ICACHE_HITS,SQC_ICACHE_MISSES,SQC_ICACHE_MISSES_DUPLICATE,SQC_DCACHE_INPUT_VALID_READYB,SQC_DCACHE_ATOMIC,SQC_DCACHE_REQ_READ_8,SQC_DCACHE_REQ,TCC_TAG_STALL_sum,TCC_NORMAL_WRITEBACK_sum,TCC_ALL_TC_OP_WB_WRITEBACK_sum,TCC_NORMAL_EVICT_sum,wave_size_6,obj_6,SQ_INSTS_VALU_INT64,SQ_INSTS_FLAT,SQ_INSTS_GDS,SQ_INSTS_EXP_GDS,SQ_INSTS_BRANCH,SQ_INSTS_SENDMSG,SQ_WAIT_ANY,SQ_WAIT_INST_ANY,TA_ADDR_STALLED_BY_TD_CYCLES_sum,TA_DATA_STALLED_BY_TC_CYCLES_sum,TCP_TCP_LATENCY_sum,TCP_TCC_READ_REQ_LATENCY_sum,TCP_TCC_WRITE_REQ_LATENCY_sum,TCP_TCC_READ_REQ_sum,TCC_EA_WRREQ_GMI_CREDIT_STALL[0],TCC_EA_WRREQ_DRAM_CREDIT_STALL[0],TCC_TOO_MANY_EA_WRREQS_STALL[0],TCC_EA_WRREQ_GMI_CREDIT_STALL[1],TCC_EA_WRREQ_DRAM_CREDIT_STALL[1],TCC_TOO_MANY_EA_WRREQS_STALL[1],TCC_EA_WRREQ_GMI_CREDIT_STALL[2],TCC_EA_WRREQ_DRAM_CREDIT_STALL[2],TCC_TOO_MANY_EA_WRREQS_STALL[2],TCC_EA_WRREQ_GMI_CREDIT_STALL[3],TCC_EA_WRREQ_DRAM_CREDIT_STALL[3],TCC_TOO_MANY_EA_WRREQS_STALL[3],TCC_EA_WRREQ_GMI_CREDIT_STALL[4],TCC_EA_WRREQ_DRAM_CREDIT_STALL[4],TCC_TOO_MANY_EA_WRREQS_STALL[4],TCC_EA_WRREQ_GMI_CREDIT_STALL[5],TCC_EA_WRREQ_DRAM_CREDIT_STALL[5],TCC_TOO_MANY_EA_WRREQS_STALL[5],TCC_EA_WRREQ_GMI_CREDIT_STALL[6],TCC_EA_WRREQ_DRAM_CREDIT_STALL[6],TCC_TOO_MANY_EA_WRREQS_STALL[6],TCC_EA_WRREQ_GMI_CREDIT_STALL[7],TCC_EA_WRREQ_DRAM_CREDIT_STALL[7],TCC_TOO_MANY_EA_WRREQS_STALL[7],TCC_EA_WRREQ_GMI_CREDIT_STALL[8],TCC_EA_WRREQ_DRAM_CREDIT_STALL[8],TCC_TOO_MANY_EA_WRREQS_STALL[8],TCC_EA_WRREQ_GMI_CREDIT_STALL[9],TCC_EA_WRREQ_DRAM_CREDIT_STALL[9],TCC_TOO_MANY_EA_WRREQS_STALL[9],TCC_EA_WRREQ_GMI_CREDIT_STALL[10],TCC_EA_WRREQ_DRAM_CREDIT_STALL[10],TCC_TOO_MANY_EA_WRREQS_STALL[10],TCC_EA_WRREQ_GMI_CREDIT_STALL[11],TCC_EA_WRREQ_DRAM_CREDIT_STALL[11],TCC_TOO_MANY_EA_WRREQS_STALL[11],TCC_EA_WRREQ_GMI_CREDIT_STALL[12],TCC_EA_WRREQ_DRAM_CREDIT_STALL[12],TCC_TOO_MANY_EA_WRREQS_STALL[12],TCC_EA_WRREQ_GMI_CREDIT_STALL[13],TCC_EA_WRREQ_DRAM_CREDIT_STALL[13],TCC_TOO_MANY_EA_WRREQS_STALL[13],TCC_EA_WRREQ_GMI_CREDIT_STALL[14],TCC_EA_WRREQ_DRAM_CREDIT_STALL[14],TCC_TOO_MANY_EA_WRREQS_STALL[14],TCC_EA_WRREQ_GMI_CREDIT_STALL[15],TCC_EA_WRREQ_DRAM_CREDIT_STALL[15],TCC_TOO_MANY_EA_WRREQS_STALL[15],TCC_EA_WRREQ_GMI_CREDIT_STALL[16],TCC_EA_WRREQ_DRAM_CREDIT_STALL[16],TCC_TOO_MANY_EA_WRREQS_STALL[16],TCC_EA_WRREQ_GMI_CREDIT_STALL[17],TCC_EA_WRREQ_DRAM_CREDIT_STALL[17],TCC_TOO_MANY_EA_WRREQS_STALL[17],TCC_EA_WRREQ_GMI_CREDIT_STALL[18],TCC_EA_WRREQ_DRAM_CREDIT_STALL[18],TCC_TOO_MANY_EA_WRREQS_STALL[18],TCC_EA_WRREQ_GMI_CREDIT_STALL[19],TCC_EA_WRREQ_DRAM_CREDIT_STALL[19],TCC_TOO_MANY_EA_WRREQS_STALL[19],TCC_EA_WRREQ_GMI_CREDIT_STALL[20],TCC_EA_WRREQ_DRAM_CREDIT_STALL[20],TCC_TOO_MANY_EA_WRREQS_STALL[20],TCC_EA_WRREQ_GMI_CREDIT_STALL[21],TCC_EA_WRREQ_DRAM_CREDIT_STALL[21],TCC_TOO_MANY_EA_WRREQS_STALL[21],TCC_EA_WRREQ_GMI_CREDIT_STALL[22],TCC_EA_WRREQ_DRAM_CREDIT_STALL[22],TCC_TOO_MANY_EA_WRREQS_STALL[22],TCC_EA_WRREQ_GMI_CREDIT_STALL[23],TCC_EA_WRREQ_DRAM_CREDIT_STALL[23],TCC_TOO_MANY_EA_WRREQS_STALL[23],TCC_EA_WRREQ_GMI_CREDIT_STALL[24],TCC_EA_WRREQ_DRAM_CREDIT_STALL[24],TCC_TOO_MANY_EA_WRREQS_STALL[24],TCC_EA_WRREQ_GMI_CREDIT_STALL[25],TCC_EA_WRREQ_DRAM_CREDIT_STALL[25],TCC_TOO_MANY_EA_WRREQS_STALL[25],TCC_EA_WRREQ_GMI_CREDIT_STALL[26],TCC_EA_WRREQ_DRAM_CREDIT_STALL[26],TCC_TOO_MANY_EA_WRREQS_STALL[26],TCC_EA_WRREQ_GMI_CREDIT_STALL[27],TCC_EA_WRREQ_DRAM_CREDIT_STALL[27],TCC_TOO_MANY_EA_WRREQS_STALL[27],TCC_EA_WRREQ_GMI_CREDIT_STALL[28],TCC_EA_WRREQ_DRAM_CREDIT_STALL[28],TCC_TOO_MANY_EA_WRREQS_STALL[28],TCC_EA_WRREQ_GMI_CREDIT_STALL[29],TCC_EA_WRREQ_DRAM_CREDIT_STALL[29],TCC_TOO_MANY_EA_WRREQS_STALL[29],TCC_EA_WRREQ_GMI_CREDIT_STALL[30],TCC_EA_WRREQ_DRAM_CREDIT_STALL[30],TCC_TOO_MANY_EA_WRREQS_STALL[30],TCC_EA_WRREQ_GMI_CREDIT_STALL[31],TCC_EA_WRREQ_DRAM_CREDIT_STALL[31],TCC_TOO_MANY_EA_WRREQS_STALL[31],TCC_CYCLE_sum,CPC_ME1_DC0_SPI_BUSY,SPI_RA_SGPR_SIMD_FULL_CSN,SPI_RA_LDS_CU_FULL_CSN,wave_size_7,obj_7,SQC_DCACHE_HITS,SQC_DCACHE_MISSES,SQC_DCACHE_MISSES_DUPLICATE,SQC_DCACHE_REQ_READ_1,SQC_DCACHE_REQ_READ_2,SQC_DCACHE_REQ_READ_4,TCC_ALL_TC_OP_INV_EVICT_sum,TCC_TOO_MANY_EA_WRREQS_STALL_sum,TCC_EA_ATOMIC_sum,TCC_EA_RDREQ_LEVEL_sum,wave_size_8,obj_8,SQ_INSTS_VALU_MFMA_MOPS_F64,SQC_TC_INST_REQ,SQC_TC_DATA_READ_REQ,SQC_TC_DATA_WRITE_REQ,SQC_TC_DATA_ATOMIC_REQ,SQC_TC_STALL,SQC_TC_REQ,SQC_DCACHE_REQ_READ_16,TCC_EA_RDREQ_sum,TCC_EA_RDREQ_32B_sum,TCC_EA_RD_UNCACHED_32B_sum,TCC_EA_RDREQ_DRAM_sum,wave_size_9,obj_9,SQ_INSTS_VALU_MFMA_F32,SQ_INSTS_VALU_MFMA_F64,SQ_VALU_MFMA_BUSY_CYCLES,SQ_INSTS_FLAT_LDS_ONLY,SQ_INSTS_VALU_MFMA_MOPS_I8,SQ_INSTS_VALU_MFMA_MOPS_F16,SQ_INSTS_VALU_MFMA_MOPS_BF16,SQ_INSTS_VALU_MFMA_MOPS_F32,TCC_EA_WRREQ_64B_sum,TCC_EA_WR_UNCACHED_32B_sum,TCC_EA_WRREQ_DRAM_sum,TCC_EA_WRREQ_STALL_sum,wave_size_10,obj_10,SQ_INSTS_SMEM,SQ_INST_LEVEL_SMEM,SQ_ACCUM_PREV_HIRES,SQ_INSTS_VALU_MUL_F16,SQ_INSTS_VALU_FMA_F16,SQ_INSTS_VALU_TRANS_F16,SQ_INSTS_VALU_ADD_F32,SQ_INSTS_VALU_MUL_F32,TA_BUFFER_ATOMIC_WAVEFRONTS_sum,TA_BUFFER_TOTAL_CYCLES_sum,TD_ATOMIC_WAVEFRONT_sum,TD_STORE_WAVEFRONT_sum,TCP_VOLATILE_sum,TCP_TOTAL_ACCESSES_sum,TCP_TOTAL_READ_sum,TCP_TOTAL_WRITE_sum,TCC_EA_RDREQ[0],TCC_EA_RDREQ_32B[0],TCC_EA_WRREQ[0],TCC_EA_WRREQ_64B[0],TCC_EA_RDREQ[1],TCC_EA_RDREQ_32B[1],TCC_EA_WRREQ[1],TCC_EA_WRREQ_64B[1],TCC_EA_RDREQ[2],TCC_EA_RDREQ_32B[2],TCC_EA_WRREQ[2],TCC_EA_WRREQ_64B[2],TCC_EA_RDREQ[3],TCC_EA_RDREQ_32B[3],TCC_EA_WRREQ[3],TCC_EA_WRREQ_64B[3],TCC_EA_RDREQ[4],TCC_EA_RDREQ_32B[4],TCC_EA_WRREQ[4],TCC_EA_WRREQ_64B[4],TCC_EA_RDREQ[5],TCC_EA_RDREQ_32B[5],TCC_EA_WRREQ[5],TCC_EA_WRREQ_64B[5],TCC_EA_RDREQ[6],TCC_EA_RDREQ_32B[6],TCC_EA_WRREQ[6],TCC_EA_WRREQ_64B[6],TCC_EA_RDREQ[7],TCC_EA_RDREQ_32B[7],TCC_EA_WRREQ[7],TCC_EA_WRREQ_64B[7],TCC_EA_RDREQ[8],TCC_EA_RDREQ_32B[8],TCC_EA_WRREQ[8],TCC_EA_WRREQ_64B[8],TCC_EA_RDREQ[9],TCC_EA_RDREQ_32B[9],TCC_EA_WRREQ[9],TCC_EA_WRREQ_64B[9],TCC_EA_RDREQ[10],TCC_EA_RDREQ_32B[10],TCC_EA_WRREQ[10],TCC_EA_WRREQ_64B[10],TCC_EA_RDREQ[11],TCC_EA_RDREQ_32B[11],TCC_EA_WRREQ[11],TCC_EA_WRREQ_64B[11],TCC_EA_RDREQ[12],TCC_EA_RDREQ_32B[12],TCC_EA_WRREQ[12],TCC_EA_WRREQ_64B[12],TCC_EA_RDREQ[13],TCC_EA_RDREQ_32B[13],TCC_EA_WRREQ[13],TCC_EA_WRREQ_64B[13],TCC_EA_RDREQ[14],TCC_EA_RDREQ_32B[14],TCC_EA_WRREQ[14],TCC_EA_WRREQ_64B[14],TCC_EA_RDREQ[15],TCC_EA_RDREQ_32B[15],TCC_EA_WRREQ[15],TCC_EA_WRREQ_64B[15],TCC_EA_RDREQ[16],TCC_EA_RDREQ_32B[16],TCC_EA_WRREQ[16],TCC_EA_WRREQ_64B[16],TCC_EA_RDREQ[17],TCC_EA_RDREQ_32B[17],TCC_EA_WRREQ[17],TCC_EA_WRREQ_64B[17],TCC_EA_RDREQ[18],TCC_EA_RDREQ_32B[18],TCC_EA_WRREQ[18],TCC_EA_WRREQ_64B[18],TCC_EA_RDREQ[19],TCC_EA_RDREQ_32B[19],TCC_EA_WRREQ[19],TCC_EA_WRREQ_64B[19],TCC_EA_RDREQ[20],TCC_EA_RDREQ_32B[20],TCC_EA_WRREQ[20],TCC_EA_WRREQ_64B[20],TCC_EA_RDREQ[21],TCC_EA_RDREQ_32B[21],TCC_EA_WRREQ[21],TCC_EA_WRREQ_64B[21],TCC_EA_RDREQ[22],TCC_EA_RDREQ_32B[22],TCC_EA_WRREQ[22],TCC_EA_WRREQ_64B[22],TCC_EA_RDREQ[23],TCC_EA_RDREQ_32B[23],TCC_EA_WRREQ[23],TCC_EA_WRREQ_64B[23],TCC_EA_RDREQ[24],TCC_EA_RDREQ_32B[24],TCC_EA_WRREQ[24],TCC_EA_WRREQ_64B[24],TCC_EA_RDREQ[25],TCC_EA_RDREQ_32B[25],TCC_EA_WRREQ[25],TCC_EA_WRREQ_64B[25],TCC_EA_RDREQ[26],TCC_EA_RDREQ_32B[26],TCC_EA_WRREQ[26],TCC_EA_WRREQ_64B[26],TCC_EA_RDREQ[27],TCC_EA_RDREQ_32B[27],TCC_EA_WRREQ[27],TCC_EA_WRREQ_64B[27],TCC_EA_RDREQ[28],TCC_EA_RDREQ_32B[28],TCC_EA_WRREQ[28],TCC_EA_WRREQ_64B[28],TCC_EA_RDREQ[29],TCC_EA_RDREQ_32B[29],TCC_EA_WRREQ[29],TCC_EA_WRREQ_64B[29],TCC_EA_RDREQ[30],TCC_EA_RDREQ_32B[30],TCC_EA_WRREQ[30],TCC_EA_WRREQ_64B[30],TCC_EA_RDREQ[31],TCC_EA_RDREQ_32B[31],TCC_EA_WRREQ[31],TCC_EA_WRREQ_64B[31],CPC_CPC_STAT_STALL,CPC_UTCL1_STALL_ON_TRANSLATION,CPF_CPF_STAT_IDLE,CPF_CPF_TCIU_IDLE,SPI_RA_REQ_NO_ALLOC,SPI_RA_REQ_NO_ALLOC_CSN,wave_size_11,obj_11,SQ_WAVES,SQ_IFETCH,SQ_IFETCH_LEVEL,SQ_ACCUM_PREV_HIRES_11,SQ_BUSY_CU_CYCLES,SQ_INSTS_VALU_CVT,SQ_INSTS_VMEM_WR,SQ_INSTS_VMEM_RD,TA_TA_BUSY_sum,TA_BUFFER_WAVEFRONTS_sum,TD_TD_BUSY_sum,TD_TC_STALL_sum,TCP_GATE_EN1_sum,TCP_GATE_EN2_sum,TCP_TD_TCP_STALL_CYCLES_sum,TCP_TCR_TCP_STALL_CYCLES_sum,TCC_CYCLE[0],TCC_RW_REQ[0],TCC_HIT[0],TCC_MISS[0],TCC_CYCLE[1],TCC_RW_REQ[1],TCC_HIT[1],TCC_MISS[1],TCC_CYCLE[2],TCC_RW_REQ[2],TCC_HIT[2],TCC_MISS[2],TCC_CYCLE[3],TCC_RW_REQ[3],TCC_HIT[3],TCC_MISS[3],TCC_CYCLE[4],TCC_RW_REQ[4],TCC_HIT[4],TCC_MISS[4],TCC_CYCLE[5],TCC_RW_REQ[5],TCC_HIT[5],TCC_MISS[5],TCC_CYCLE[6],TCC_RW_REQ[6],TCC_HIT[6],TCC_MISS[6],TCC_CYCLE[7],TCC_RW_REQ[7],TCC_HIT[7],TCC_MISS[7],TCC_CYCLE[8],TCC_RW_REQ[8],TCC_HIT[8],TCC_MISS[8],TCC_CYCLE[9],TCC_RW_REQ[9],TCC_HIT[9],TCC_MISS[9],TCC_CYCLE[10],TCC_RW_REQ[10],TCC_HIT[10],TCC_MISS[10],TCC_CYCLE[11],TCC_RW_REQ[11],TCC_HIT[11],TCC_MISS[11],TCC_CYCLE[12],TCC_RW_REQ[12],TCC_HIT[12],TCC_MISS[12],TCC_CYCLE[13],TCC_RW_REQ[13],TCC_HIT[13],TCC_MISS[13],TCC_CYCLE[14],TCC_RW_REQ[14],TCC_HIT[14],TCC_MISS[14],TCC_CYCLE[15],TCC_RW_REQ[15],TCC_HIT[15],TCC_MISS[15],TCC_CYCLE[16],TCC_RW_REQ[16],TCC_HIT[16],TCC_MISS[16],TCC_CYCLE[17],TCC_RW_REQ[17],TCC_HIT[17],TCC_MISS[17],TCC_CYCLE[18],TCC_RW_REQ[18],TCC_HIT[18],TCC_MISS[18],TCC_CYCLE[19],TCC_RW_REQ[19],TCC_HIT[19],TCC_MISS[19],TCC_CYCLE[20],TCC_RW_REQ[20],TCC_HIT[20],TCC_MISS[20],TCC_CYCLE[21],TCC_RW_REQ[21],TCC_HIT[21],TCC_MISS[21],TCC_CYCLE[22],TCC_RW_REQ[22],TCC_HIT[22],TCC_MISS[22],TCC_CYCLE[23],TCC_RW_REQ[23],TCC_HIT[23],TCC_MISS[23],TCC_CYCLE[24],TCC_RW_REQ[24],TCC_HIT[24],TCC_MISS[24],TCC_CYCLE[25],TCC_RW_REQ[25],TCC_HIT[25],TCC_MISS[25],TCC_CYCLE[26],TCC_RW_REQ[26],TCC_HIT[26],TCC_MISS[26],TCC_CYCLE[27],TCC_RW_REQ[27],TCC_HIT[27],TCC_MISS[27],TCC_CYCLE[28],TCC_RW_REQ[28],TCC_HIT[28],TCC_MISS[28],TCC_CYCLE[29],TCC_RW_REQ[29],TCC_HIT[29],TCC_MISS[29],TCC_CYCLE[30],TCC_RW_REQ[30],TCC_HIT[30],TCC_MISS[30],TCC_CYCLE[31],TCC_RW_REQ[31],TCC_HIT[31],TCC_MISS[31],CPC_CPC_STAT_BUSY,CPC_CPC_STAT_IDLE,CPF_CPF_STAT_BUSY,CPF_CPF_STAT_STALL,SPI_CSN_WINDOW_VALID,SPI_CSN_BUSY,GRBM_COUNT,GRBM_GUI_ACTIVE,wave_size_12,obj_12,SQ_CYCLES,SQ_WAVES_12,SQ_WAVE_CYCLES,SQ_BUSY_CYCLES,SQ_LEVEL_WAVES,SQ_ACCUM_PREV_HIRES_12,SQ_INSTS_VALU_TRANS_F64,SQ_INSTS_VALU_INT32,TA_ADDR_STALLED_BY_TC_CYCLES_sum,TA_TOTAL_WAVEFRONTS_sum,TCP_UTCL1_TRANSLATION_MISS_sum,TCP_UTCL1_TRANSLATION_HIT_sum,TCP_UTCL1_PERMISSION_MISS_sum,TCP_UTCL1_REQUEST_sum,TCC_EA_RDREQ_IO_CREDIT_STALL[0],TCC_EA_RDREQ_GMI_CREDIT_STALL[0],TCC_EA_RDREQ_DRAM_CREDIT_STALL[0],TCC_EA_WRREQ_IO_CREDIT_STALL[0],TCC_EA_RDREQ_IO_CREDIT_STALL[1],TCC_EA_RDREQ_GMI_CREDIT_STALL[1],TCC_EA_RDREQ_DRAM_CREDIT_STALL[1],TCC_EA_WRREQ_IO_CREDIT_STALL[1],TCC_EA_RDREQ_IO_CREDIT_STALL[2],TCC_EA_RDREQ_GMI_CREDIT_STALL[2],TCC_EA_RDREQ_DRAM_CREDIT_STALL[2],TCC_EA_WRREQ_IO_CREDIT_STALL[2],TCC_EA_RDREQ_IO_CREDIT_STALL[3],TCC_EA_RDREQ_GMI_CREDIT_STALL[3],TCC_EA_RDREQ_DRAM_CREDIT_STALL[3],TCC_EA_WRREQ_IO_CREDIT_STALL[3],TCC_EA_RDREQ_IO_CREDIT_STALL[4],TCC_EA_RDREQ_GMI_CREDIT_STALL[4],TCC_EA_RDREQ_DRAM_CREDIT_STALL[4],TCC_EA_WRREQ_IO_CREDIT_STALL[4],TCC_EA_RDREQ_IO_CREDIT_STALL[5],TCC_EA_RDREQ_GMI_CREDIT_STALL[5],TCC_EA_RDREQ_DRAM_CREDIT_STALL[5],TCC_EA_WRREQ_IO_CREDIT_STALL[5],TCC_EA_RDREQ_IO_CREDIT_STALL[6],TCC_EA_RDREQ_GMI_CREDIT_STALL[6],TCC_EA_RDREQ_DRAM_CREDIT_STALL[6],TCC_EA_WRREQ_IO_CREDIT_STALL[6],TCC_EA_RDREQ_IO_CREDIT_STALL[7],TCC_EA_RDREQ_GMI_CREDIT_STALL[7],TCC_EA_RDREQ_DRAM_CREDIT_STALL[7],TCC_EA_WRREQ_IO_CREDIT_STALL[7],TCC_EA_RDREQ_IO_CREDIT_STALL[8],TCC_EA_RDREQ_GMI_CREDIT_STALL[8],TCC_EA_RDREQ_DRAM_CREDIT_STALL[8],TCC_EA_WRREQ_IO_CREDIT_STALL[8],TCC_EA_RDREQ_IO_CREDIT_STALL[9],TCC_EA_RDREQ_GMI_CREDIT_STALL[9],TCC_EA_RDREQ_DRAM_CREDIT_STALL[9],TCC_EA_WRREQ_IO_CREDIT_STALL[9],TCC_EA_RDREQ_IO_CREDIT_STALL[10],TCC_EA_RDREQ_GMI_CREDIT_STALL[10],TCC_EA_RDREQ_DRAM_CREDIT_STALL[10],TCC_EA_WRREQ_IO_CREDIT_STALL[10],TCC_EA_RDREQ_IO_CREDIT_STALL[11],TCC_EA_RDREQ_GMI_CREDIT_STALL[11],TCC_EA_RDREQ_DRAM_CREDIT_STALL[11],TCC_EA_WRREQ_IO_CREDIT_STALL[11],TCC_EA_RDREQ_IO_CREDIT_STALL[12],TCC_EA_RDREQ_GMI_CREDIT_STALL[12],TCC_EA_RDREQ_DRAM_CREDIT_STALL[12],TCC_EA_WRREQ_IO_CREDIT_STALL[12],TCC_EA_RDREQ_IO_CREDIT_STALL[13],TCC_EA_RDREQ_GMI_CREDIT_STALL[13],TCC_EA_RDREQ_DRAM_CREDIT_STALL[13],TCC_EA_WRREQ_IO_CREDIT_STALL[13],TCC_EA_RDREQ_IO_CREDIT_STALL[14],TCC_EA_RDREQ_GMI_CREDIT_STALL[14],TCC_EA_RDREQ_DRAM_CREDIT_STALL[14],TCC_EA_WRREQ_IO_CREDIT_STALL[14],TCC_EA_RDREQ_IO_CREDIT_STALL[15],TCC_EA_RDREQ_GMI_CREDIT_STALL[15],TCC_EA_RDREQ_DRAM_CREDIT_STALL[15],TCC_EA_WRREQ_IO_CREDIT_STALL[15],TCC_EA_RDREQ_IO_CREDIT_STALL[16],TCC_EA_RDREQ_GMI_CREDIT_STALL[16],TCC_EA_RDREQ_DRAM_CREDIT_STALL[16],TCC_EA_WRREQ_IO_CREDIT_STALL[16],TCC_EA_RDREQ_IO_CREDIT_STALL[17],TCC_EA_RDREQ_GMI_CREDIT_STALL[17],TCC_EA_RDREQ_DRAM_CREDIT_STALL[17],TCC_EA_WRREQ_IO_CREDIT_STALL[17],TCC_EA_RDREQ_IO_CREDIT_STALL[18],TCC_EA_RDREQ_GMI_CREDIT_STALL[18],TCC_EA_RDREQ_DRAM_CREDIT_STALL[18],TCC_EA_WRREQ_IO_CREDIT_STALL[18],TCC_EA_RDREQ_IO_CREDIT_STALL[19],TCC_EA_RDREQ_GMI_CREDIT_STALL[19],TCC_EA_RDREQ_DRAM_CREDIT_STALL[19],TCC_EA_WRREQ_IO_CREDIT_STALL[19],TCC_EA_RDREQ_IO_CREDIT_STALL[20],TCC_EA_RDREQ_GMI_CREDIT_STALL[20],TCC_EA_RDREQ_DRAM_CREDIT_STALL[20],TCC_EA_WRREQ_IO_CREDIT_STALL[20],TCC_EA_RDREQ_IO_CREDIT_STALL[21],TCC_EA_RDREQ_GMI_CREDIT_STALL[21],TCC_EA_RDREQ_DRAM_CREDIT_STALL[21],TCC_EA_WRREQ_IO_CREDIT_STALL[21],TCC_EA_RDREQ_IO_CREDIT_STALL[22],TCC_EA_RDREQ_GMI_CREDIT_STALL[22],TCC_EA_RDREQ_DRAM_CREDIT_STALL[22],TCC_EA_WRREQ_IO_CREDIT_STALL[22],TCC_EA_RDREQ_IO_CREDIT_STALL[23],TCC_EA_RDREQ_GMI_CREDIT_STALL[23],TCC_EA_RDREQ_DRAM_CREDIT_STALL[23],TCC_EA_WRREQ_IO_CREDIT_STALL[23],TCC_EA_RDREQ_IO_CREDIT_STALL[24],TCC_EA_RDREQ_GMI_CREDIT_STALL[24],TCC_EA_RDREQ_DRAM_CREDIT_STALL[24],TCC_EA_WRREQ_IO_CREDIT_STALL[24],TCC_EA_RDREQ_IO_CREDIT_STALL[25],TCC_EA_RDREQ_GMI_CREDIT_STALL[25],TCC_EA_RDREQ_DRAM_CREDIT_STALL[25],TCC_EA_WRREQ_IO_CREDIT_STALL[25],TCC_EA_RDREQ_IO_CREDIT_STALL[26],TCC_EA_RDREQ_GMI_CREDIT_STALL[26],TCC_EA_RDREQ_DRAM_CREDIT_STALL[26],TCC_EA_WRREQ_IO_CREDIT_STALL[26],TCC_EA_RDREQ_IO_CREDIT_STALL[27],TCC_EA_RDREQ_GMI_CREDIT_STALL[27],TCC_EA_RDREQ_DRAM_CREDIT_STALL[27],TCC_EA_WRREQ_IO_CREDIT_STALL[27],TCC_EA_RDREQ_IO_CREDIT_STALL[28],TCC_EA_RDREQ_GMI_CREDIT_STALL[28],TCC_EA_RDREQ_DRAM_CREDIT_STALL[28],TCC_EA_WRREQ_IO_CREDIT_STALL[28],TCC_EA_RDREQ_IO_CREDIT_STALL[29],TCC_EA_RDREQ_GMI_CREDIT_STALL[29],TCC_EA_RDREQ_DRAM_CREDIT_STALL[29],TCC_EA_WRREQ_IO_CREDIT_STALL[29],TCC_EA_RDREQ_IO_CREDIT_STALL[30],TCC_EA_RDREQ_GMI_CREDIT_STALL[30],TCC_EA_RDREQ_DRAM_CREDIT_STALL[30],TCC_EA_WRREQ_IO_CREDIT_STALL[30],TCC_EA_RDREQ_IO_CREDIT_STALL[31],TCC_EA_RDREQ_GMI_CREDIT_STALL[31],TCC_EA_RDREQ_DRAM_CREDIT_STALL[31],TCC_EA_WRREQ_IO_CREDIT_STALL[31],CPC_ME1_BUSY_FOR_PACKET_DECODE,CPC_CPC_UTCL2IU_STALL,SPI_RA_WAVE_SIMD_FULL_CSN,SPI_RA_VGPR_SIMD_FULL_CSN,GRBM_COUNT_12,GRBM_GUI_ACTIVE_12,wave_size_13,obj_13,SQ_INSTS_LDS,SQ_INST_LEVEL_LDS,SQ_ACCUM_PREV_HIRES_13,SQ_INSTS_SALU,SQ_INSTS_VSKIPPED,SQ_INSTS,SQ_INSTS_VALU,SQ_INSTS_VALU_ADD_F16,TA_BUFFER_READ_WAVEFRONTS_sum,TA_BUFFER_WRITE_WAVEFRONTS_sum,TD_SPI_STALL_sum,TD_LOAD_WAVEFRONT_sum,TCP_READ_TAGCONFLICT_STALL_CYCLES_sum,TCP_WRITE_TAGCONFLICT_STALL_CYCLES_sum,TCP_ATOMIC_TAGCONFLICT_STALL_CYCLES_sum,TCP_TA_TCP_STATE_READ_sum,TCC_REQ[0],TCC_READ[0],TCC_WRITE[0],TCC_ATOMIC[0],TCC_REQ[1],TCC_READ[1],TCC_WRITE[1],TCC_ATOMIC[1],TCC_REQ[2],TCC_READ[2],TCC_WRITE[2],TCC_ATOMIC[2],TCC_REQ[3],TCC_READ[3],TCC_WRITE[3],TCC_ATOMIC[3],TCC_REQ[4],TCC_READ[4],TCC_WRITE[4],TCC_ATOMIC[4],TCC_REQ[5],TCC_READ[5],TCC_WRITE[5],TCC_ATOMIC[5],TCC_REQ[6],TCC_READ[6],TCC_WRITE[6],TCC_ATOMIC[6],TCC_REQ[7],TCC_READ[7],TCC_WRITE[7],TCC_ATOMIC[7],TCC_REQ[8],TCC_READ[8],TCC_WRITE[8],TCC_ATOMIC[8],TCC_REQ[9],TCC_READ[9],TCC_WRITE[9],TCC_ATOMIC[9],TCC_REQ[10],TCC_READ[10],TCC_WRITE[10],TCC_ATOMIC[10],TCC_REQ[11],TCC_READ[11],TCC_WRITE[11],TCC_ATOMIC[11],TCC_REQ[12],TCC_READ[12],TCC_WRITE[12],TCC_ATOMIC[12],TCC_REQ[13],TCC_READ[13],TCC_WRITE[13],TCC_ATOMIC[13],TCC_REQ[14],TCC_READ[14],TCC_WRITE[14],TCC_ATOMIC[14],TCC_REQ[15],TCC_READ[15],TCC_WRITE[15],TCC_ATOMIC[15],TCC_REQ[16],TCC_READ[16],TCC_WRITE[16],TCC_ATOMIC[16],TCC_REQ[17],TCC_READ[17],TCC_WRITE[17],TCC_ATOMIC[17],TCC_REQ[18],TCC_READ[18],TCC_WRITE[18],TCC_ATOMIC[18],TCC_REQ[19],TCC_READ[19],TCC_WRITE[19],TCC_ATOMIC[19],TCC_REQ[20],TCC_READ[20],TCC_WRITE[20],TCC_ATOMIC[20],TCC_REQ[21],TCC_READ[21],TCC_WRITE[21],TCC_ATOMIC[21],TCC_REQ[22],TCC_READ[22],TCC_WRITE[22],TCC_ATOMIC[22],TCC_REQ[23],TCC_READ[23],TCC_WRITE[23],TCC_ATOMIC[23],TCC_REQ[24],TCC_READ[24],TCC_WRITE[24],TCC_ATOMIC[24],TCC_REQ[25],TCC_READ[25],TCC_WRITE[25],TCC_ATOMIC[25],TCC_REQ[26],TCC_READ[26],TCC_WRITE[26],TCC_ATOMIC[26],TCC_REQ[27],TCC_READ[27],TCC_WRITE[27],TCC_ATOMIC[27],TCC_REQ[28],TCC_READ[28],TCC_WRITE[28],TCC_ATOMIC[28],TCC_REQ[29],TCC_READ[29],TCC_WRITE[29],TCC_ATOMIC[29],TCC_REQ[30],TCC_READ[30],TCC_WRITE[30],TCC_ATOMIC[30],TCC_REQ[31],TCC_READ[31],TCC_WRITE[31],TCC_ATOMIC[31],CPC_CPC_TCIU_BUSY,CPC_CPC_TCIU_IDLE,CPF_CPF_TCIU_BUSY,CPF_CPF_TCIU_STALL,SPI_CSN_NUM_THREADGROUPS,SPI_CSN_WAVE,GRBM_SPI_BUSY,wave_size_14,obj_14,SQ_INSTS_VMEM,SQ_INST_LEVEL_VMEM,SQ_ACCUM_PREV_HIRES_14,SQ_INSTS_VALU_FMA_F32,SQ_INSTS_VALU_TRANS_F32,SQ_INSTS_VALU_ADD_F64,SQ_INSTS_VALU_MUL_F64,SQ_INSTS_VALU_FMA_F64,TA_BUFFER_COALESCED_READ_CYCLES_sum,TA_BUFFER_COALESCED_WRITE_CYCLES_sum,TD_COALESCABLE_WAVEFRONT_sum,TCP_TOTAL_ATOMIC_WITH_RET_sum,TCP_TOTAL_ATOMIC_WITHOUT_RET_sum,TCP_TOTAL_WRITEBACK_INVALIDATES_sum,TCP_TOTAL_CACHE_ACCESSES_sum,TCC_EA_ATOMIC[0],TCC_EA_RDREQ_LEVEL[0],TCC_EA_WRREQ_LEVEL[0],TCC_EA_ATOMIC_LEVEL[0],TCC_EA_ATOMIC[1],TCC_EA_RDREQ_LEVEL[1],TCC_EA_WRREQ_LEVEL[1],TCC_EA_ATOMIC_LEVEL[1],TCC_EA_ATOMIC[2],TCC_EA_RDREQ_LEVEL[2],TCC_EA_WRREQ_LEVEL[2],TCC_EA_ATOMIC_LEVEL[2],TCC_EA_ATOMIC[3],TCC_EA_RDREQ_LEVEL[3],TCC_EA_WRREQ_LEVEL[3],TCC_EA_ATOMIC_LEVEL[3],TCC_EA_ATOMIC[4],TCC_EA_RDREQ_LEVEL[4],TCC_EA_WRREQ_LEVEL[4],TCC_EA_ATOMIC_LEVEL[4],TCC_EA_ATOMIC[5],TCC_EA_RDREQ_LEVEL[5],TCC_EA_WRREQ_LEVEL[5],TCC_EA_ATOMIC_LEVEL[5],TCC_EA_ATOMIC[6],TCC_EA_RDREQ_LEVEL[6],TCC_EA_WRREQ_LEVEL[6],TCC_EA_ATOMIC_LEVEL[6],TCC_EA_ATOMIC[7],TCC_EA_RDREQ_LEVEL[7],TCC_EA_WRREQ_LEVEL[7],TCC_EA_ATOMIC_LEVEL[7],TCC_EA_ATOMIC[8],TCC_EA_RDREQ_LEVEL[8],TCC_EA_WRREQ_LEVEL[8],TCC_EA_ATOMIC_LEVEL[8],TCC_EA_ATOMIC[9],TCC_EA_RDREQ_LEVEL[9],TCC_EA_WRREQ_LEVEL[9],TCC_EA_ATOMIC_LEVEL[9],TCC_EA_ATOMIC[10],TCC_EA_RDREQ_LEVEL[10],TCC_EA_WRREQ_LEVEL[10],TCC_EA_ATOMIC_LEVEL[10],TCC_EA_ATOMIC[11],TCC_EA_RDREQ_LEVEL[11],TCC_EA_WRREQ_LEVEL[11],TCC_EA_ATOMIC_LEVEL[11],TCC_EA_ATOMIC[12],TCC_EA_RDREQ_LEVEL[12],TCC_EA_WRREQ_LEVEL[12],TCC_EA_ATOMIC_LEVEL[12],TCC_EA_ATOMIC[13],TCC_EA_RDREQ_LEVEL[13],TCC_EA_WRREQ_LEVEL[13],TCC_EA_ATOMIC_LEVEL[13],TCC_EA_ATOMIC[14],TCC_EA_RDREQ_LEVEL[14],TCC_EA_WRREQ_LEVEL[14],TCC_EA_ATOMIC_LEVEL[14],TCC_EA_ATOMIC[15],TCC_EA_RDREQ_LEVEL[15],TCC_EA_WRREQ_LEVEL[15],TCC_EA_ATOMIC_LEVEL[15],TCC_EA_ATOMIC[16],TCC_EA_RDREQ_LEVEL[16],TCC_EA_WRREQ_LEVEL[16],TCC_EA_ATOMIC_LEVEL[16],TCC_EA_ATOMIC[17],TCC_EA_RDREQ_LEVEL[17],TCC_EA_WRREQ_LEVEL[17],TCC_EA_ATOMIC_LEVEL[17],TCC_EA_ATOMIC[18],TCC_EA_RDREQ_LEVEL[18],TCC_EA_WRREQ_LEVEL[18],TCC_EA_ATOMIC_LEVEL[18],TCC_EA_ATOMIC[19],TCC_EA_RDREQ_LEVEL[19],TCC_EA_WRREQ_LEVEL[19],TCC_EA_ATOMIC_LEVEL[19],TCC_EA_ATOMIC[20],TCC_EA_RDREQ_LEVEL[20],TCC_EA_WRREQ_LEVEL[20],TCC_EA_ATOMIC_LEVEL[20],TCC_EA_ATOMIC[21],TCC_EA_RDREQ_LEVEL[21],TCC_EA_WRREQ_LEVEL[21],TCC_EA_ATOMIC_LEVEL[21],TCC_EA_ATOMIC[22],TCC_EA_RDREQ_LEVEL[22],TCC_EA_WRREQ_LEVEL[22],TCC_EA_ATOMIC_LEVEL[22],TCC_EA_ATOMIC[23],TCC_EA_RDREQ_LEVEL[23],TCC_EA_WRREQ_LEVEL[23],TCC_EA_ATOMIC_LEVEL[23],TCC_EA_ATOMIC[24],TCC_EA_RDREQ_LEVEL[24],TCC_EA_WRREQ_LEVEL[24],TCC_EA_ATOMIC_LEVEL[24],TCC_EA_ATOMIC[25],TCC_EA_RDREQ_LEVEL[25],TCC_EA_WRREQ_LEVEL[25],TCC_EA_ATOMIC_LEVEL[25],TCC_EA_ATOMIC[26],TCC_EA_RDREQ_LEVEL[26],TCC_EA_WRREQ_LEVEL[26],TCC_EA_ATOMIC_LEVEL[26],TCC_EA_ATOMIC[27],TCC_EA_RDREQ_LEVEL[27],TCC_EA_WRREQ_LEVEL[27],TCC_EA_ATOMIC_LEVEL[27],TCC_EA_ATOMIC[28],TCC_EA_RDREQ_LEVEL[28],TCC_EA_WRREQ_LEVEL[28],TCC_EA_ATOMIC_LEVEL[28],TCC_EA_ATOMIC[29],TCC_EA_RDREQ_LEVEL[29],TCC_EA_WRREQ_LEVEL[29],TCC_EA_ATOMIC_LEVEL[29],TCC_EA_ATOMIC[30],TCC_EA_RDREQ_LEVEL[30],TCC_EA_WRREQ_LEVEL[30],TCC_EA_ATOMIC_LEVEL[30],TCC_EA_ATOMIC[31],TCC_EA_RDREQ_LEVEL[31],TCC_EA_WRREQ_LEVEL[31],TCC_EA_ATOMIC_LEVEL[31],CPC_CPC_UTCL2IU_BUSY,CPC_CPC_UTCL2IU_IDLE,CPF_CMP_UTCL1_STALL_ON_TRANSLATION,SPI_RA_RES_STALL_CSN,SPI_RA_TMP_STALL_CSN,Start_Timestamp,End_Timestamp +0,__amd_rocclr_fillBufferAligned.kd,2,256,256,0,0,12,4,32,64,0x7fb019e7d800,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,64,0x7f099c4bd800,0.0,0.0,64,0x7fe35344d800,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fa93ecdd800,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fe7b64bd800,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7f501f115800,46,26,5,15,2,0,4,16,930.0,0.0,0.0,0.0,64,0x7fbc510b5800,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,1213216,992963,0,0,0,0,1175854,1033860,0,1520989,1091404,0,1468827,1047492,0,0,0,0,1239120,1082184,0,1072038,981313,0,1024755,978955,0,1114067,1067349,0,1125538,973712,0,1092006,1136315,0,1488950,1046986,0,1420108,1188281,0,1087014,957831,0,1079591,979515,0,1200842,1084335,0,0,0,0,1085995,1022303,0,1295858,1104205,0,1049908,1014016,0,1533784,1320993,0,1166083,1076118,0,1122956,992991,0,998575,952668,0,1169185,1082850,0,1178114,999491,0,1062056,1000641,0,1153969,999775,0,1310176,1179285,0,0.0,14941,16384,0,64,0x7f817b4a5800,46,26,5,15,2,0,0.0,0.0,0.0,5812.0,64,0x7f410dc0d800,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7ff353b15800,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7f96ea49d800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,64,0x7fc39b83d800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,15492,15492,64,0x7f0b7a335800,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,15256,15256,64,0x7f092c135800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,7832,64,0x7fa983c4d800,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9595697581667,9595697588067 +1,"global_write(int*, int) [clone .kd]",2,1,1,0,0,4,4,16,64,0x7fb019e4d200,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,64,0x7f099c49d200,0.0,0.0,64,0x7fe353429200,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fa93eca5200,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fe7b648d200,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7f501efe1200,46,26,5,15,2,0,4,16,930.0,0.0,0.0,0.0,64,0x7fbc51079200,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,1213216,992963,0,0,0,0,1175854,1033860,0,1520989,1091404,0,1468827,1047492,0,0,0,0,1239120,1082184,0,1072038,981313,0,1024755,978955,0,1114067,1067349,0,1125538,973712,0,1092006,1136315,0,1488950,1046986,0,1420108,1188281,0,1087014,957831,0,1079591,979515,0,1200842,1084335,0,0,0,0,1085995,1022303,0,1295858,1104205,0,1049908,1014016,0,1533784,1320993,0,1166083,1076118,0,1122956,992991,0,998575,952668,0,1169185,1082850,0,1178114,999491,0,1062056,1000641,0,1153969,999775,0,1310176,1179285,0,0.0,14941,16384,0,64,0x7f817b47d200,46,26,5,15,2,0,0.0,0.0,0.0,5812.0,64,0x7f410c9e1200,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7ff353af1200,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7f96ea465200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,64,0x7fc39b805200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,14214,14214,64,0x7f0b7a319200,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,14215,14215,64,0x7f092c0f9200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,4807,64,0x7fa983c15200,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9595697981345,9595697983585 +2,"generic_write(int*, int, int) [clone .kd]",2,1,1,4096,0,4,4,48,64,0x7fb019e4d240,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,64,0x7f099c49d240,0.0,0.0,64,0x7fe353429240,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fa93eca5240,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fe7b648d240,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7f501efe1240,46,26,5,15,2,0,4,16,930.0,0.0,0.0,0.0,64,0x7fbc51079240,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,1213216,992963,0,0,0,0,1175854,1033860,0,1520989,1091404,0,1468827,1047492,0,0,0,0,1239120,1082184,0,1072038,981313,0,1024755,978955,0,1114067,1067349,0,1125538,973712,0,1092006,1136315,0,1488950,1046986,0,1420108,1188281,0,1087014,957831,0,1079591,979515,0,1200842,1084335,0,0,0,0,1085995,1022303,0,1295858,1104205,0,1049908,1014016,0,1533784,1320993,0,1166083,1076118,0,1122956,992991,0,998575,952668,0,1169185,1082850,0,1178114,999491,0,1062056,1000641,0,1153969,999775,0,1310176,1179285,0,0.0,14941,16384,0,64,0x7f817b47d240,46,26,5,15,2,0,0.0,0.0,0.0,5812.0,64,0x7f410c9e1240,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7ff353af1240,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7f96ea465240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,64,0x7fc39b805240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16792,16792,64,0x7f0b7a319240,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,16303,16303,64,0x7f092c0f9240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6147,64,0x7fa983c15240,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9595697989025,9595697991265 +3,"global_read(int*, int) [clone .kd]",2,1,1,0,0,4,4,16,64,0x7fb019e4d280,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,64,0x7f099c49d280,0.0,0.0,64,0x7fe353429280,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fa93eca5280,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fe7b648d280,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7f501efe1280,46,26,5,15,2,0,4,16,930.0,0.0,0.0,0.0,64,0x7fbc51079280,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,1213216,992963,0,0,0,0,1175854,1033860,0,1520989,1091404,0,1468827,1047492,0,0,0,0,1239120,1082184,0,1072038,981313,0,1024755,978955,0,1114067,1067349,0,1125538,973712,0,1092006,1136315,0,1488950,1046986,0,1420108,1188281,0,1087014,957831,0,1079591,979515,0,1200842,1084335,0,0,0,0,1085995,1022303,0,1295858,1104205,0,1049908,1014016,0,1533784,1320993,0,1166083,1076118,0,1122956,992991,0,998575,952668,0,1169185,1082850,0,1178114,999491,0,1062056,1000641,0,1153969,999775,0,1310176,1179285,0,0.0,14941,16384,0,64,0x7f817b47d280,46,26,5,15,2,0,0.0,0.0,0.0,5812.0,64,0x7f410c9e1280,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7ff353af1280,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7f96ea465280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,64,0x7fc39b805280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,17066,17066,64,0x7f0b7a319280,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,17633,17633,64,0x7f092c0f9280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6439,64,0x7fa983c15280,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9595698033025,9595698035745 +4,"generic_read(int*, int, int) [clone .kd]",2,1,1,0,0,4,4,16,64,0x7fb019e4d2c0,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,64,0x7f099c49d2c0,0.0,0.0,64,0x7fe3534292c0,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fa93eca52c0,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fe7b648d2c0,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7f501efe12c0,46,26,5,15,2,0,4,16,930.0,0.0,0.0,0.0,64,0x7fbc510792c0,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,1213216,992963,0,0,0,0,1175854,1033860,0,1520989,1091404,0,1468827,1047492,0,0,0,0,1239120,1082184,0,1072038,981313,0,1024755,978955,0,1114067,1067349,0,1125538,973712,0,1092006,1136315,0,1488950,1046986,0,1420108,1188281,0,1087014,957831,0,1079591,979515,0,1200842,1084335,0,0,0,0,1085995,1022303,0,1295858,1104205,0,1049908,1014016,0,1533784,1320993,0,1166083,1076118,0,1122956,992991,0,998575,952668,0,1169185,1082850,0,1178114,999491,0,1062056,1000641,0,1153969,999775,0,1310176,1179285,0,0.0,14941,16384,0,64,0x7f817b47d2c0,46,26,5,15,2,0,0.0,0.0,0.0,5812.0,64,0x7f410c9e12c0,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7ff353af12c0,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7f96ea4652c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,64,0x7fc39b8052c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16835,16835,64,0x7f0b7a3192c0,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,16725,16725,64,0x7f092c0f92c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6821,64,0x7fa983c152c0,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9595698039905,9595698042305 +5,"global_atomic(int*, int) [clone .kd]",2,1,1,0,0,4,4,16,64,0x7fb019e4d300,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,64,0x7f099c49d300,0.0,0.0,64,0x7fe353429300,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fa93eca5300,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fe7b648d300,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7f501efe1300,46,26,5,15,2,0,4,16,930.0,0.0,0.0,0.0,64,0x7fbc51079300,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,1213216,992963,0,0,0,0,1175854,1033860,0,1520989,1091404,0,1468827,1047492,0,0,0,0,1239120,1082184,0,1072038,981313,0,1024755,978955,0,1114067,1067349,0,1125538,973712,0,1092006,1136315,0,1488950,1046986,0,1420108,1188281,0,1087014,957831,0,1079591,979515,0,1200842,1084335,0,0,0,0,1085995,1022303,0,1295858,1104205,0,1049908,1014016,0,1533784,1320993,0,1166083,1076118,0,1122956,992991,0,998575,952668,0,1169185,1082850,0,1178114,999491,0,1062056,1000641,0,1153969,999775,0,1310176,1179285,0,0.0,14941,16384,0,64,0x7f817b47d300,46,26,5,15,2,0,0.0,0.0,0.0,5812.0,64,0x7f410c9e1300,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7ff353af1300,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7f96ea465300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,64,0x7fc39b805300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,16490,16490,64,0x7f0b7a319300,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,17452,17452,64,0x7f092c0f9300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,6708,64,0x7fa983c15300,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9595698072385,9595698076225 +6,"generic_atomic(int*, int, int) [clone .kd]",2,1,1,4096,0,4,4,48,64,0x7fb019e4d340,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,64,0x7f099c49d340,0.0,0.0,64,0x7fe353429340,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fa93eca5340,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7fe7b648d340,0,0,0,0,0,0,0,0,131072.0,131072.0,0.0,971473.0,33551804.0,29353105.0,0.0,0.0,16384,0,64,0x7f501efe1340,46,26,5,15,2,0,4,16,930.0,0.0,0.0,0.0,64,0x7fbc51079340,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,1213216,992963,0,0,0,0,1175854,1033860,0,1520989,1091404,0,1468827,1047492,0,0,0,0,1239120,1082184,0,1072038,981313,0,1024755,978955,0,1114067,1067349,0,1125538,973712,0,1092006,1136315,0,1488950,1046986,0,1420108,1188281,0,1087014,957831,0,1079591,979515,0,1200842,1084335,0,0,0,0,1085995,1022303,0,1295858,1104205,0,1049908,1014016,0,1533784,1320993,0,1166083,1076118,0,1122956,992991,0,998575,952668,0,1169185,1082850,0,1178114,999491,0,1062056,1000641,0,1153969,999775,0,1310176,1179285,0,0.0,14941,16384,0,64,0x7f817b47d340,46,26,5,15,2,0,0.0,0.0,0.0,5812.0,64,0x7f410c9e1340,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7ff353af1340,0,0,0,0,0,0,0,0,33551804.0,29353105.0,0.0,0.0,64,0x7f96ea465340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,64,0x7fc39b805340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,17235,17235,64,0x7f0b7a319340,0,0,0,0,0,0,0,0,16384.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,16384,0,17418,17418,64,0x7f092c0f9340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,16384.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,25626,16384,0,7838,64,0x7fa983c15340,0,0,0,0,0,0,0,0,16384.0,0.0,0.0,131072.0,131072.0,0.0,971473.0,1102230,965274,0,0,0,0,0,0,1213216,992963,0,0,0,0,0,0,1175854,1033860,0,0,1520989,1091404,0,0,1468827,1047492,0,0,0,0,0,0,1239120,1082184,0,0,1072038,981313,0,0,1024755,978955,0,0,1114067,1067349,0,0,1125538,973712,0,0,1092006,1136315,0,0,1488950,1046986,0,0,1420108,1188281,0,0,1087014,957831,0,0,1079591,979515,0,0,1200842,1084335,0,0,0,0,0,0,1085995,1022303,0,0,1295858,1104205,0,0,1049908,1014016,0,0,1533784,1320993,0,0,1166083,1076118,0,0,1122956,992991,0,0,998575,952668,0,0,1169185,1082850,0,0,1178114,999491,0,0,1062056,1000641,0,0,1153969,999775,0,0,1310176,1179285,0,0,14941,11278,0,16384,0,9595698076225,9595698080065 diff --git a/tests/workloads/multikernel/MI200/roofline.csv b/tests/workloads/multikernel/MI200/roofline.csv new file mode 100644 index 000000000..6ec250343 --- /dev/null +++ b/tests/workloads/multikernel/MI200/roofline.csv @@ -0,0 +1,4 @@ +device,HBMBw,HBMBwLow,hbmBwHigh,L2Bw,L2BwLow,L2BwHigh,L1Bw,L1BwLow,L1BwHigh,LDSBw,LDSBwLow,LDSBwHigh,FP32Flops,FP32FlopsLow,FP32FlopsHigh,FP64Flops,FP64FlopsLow,FP64FlopsHigh,MFMABF16Flops,MFMABF16FlopsLow,MFMABF16FlopsHigh,MFMAF16Flops,MFMAF16FlopsLow,MFMAF16FlopsHigh,MFMAF32Flops,MFMAF32FlopsLow,MFMAF32FlopsHigh,MFMAF64Flops,MFMAF64FlopsLow,MFMAF64FlopsHigh,MFMAI8Ops,MFMAFI8OpsLow,MFMAI8OpsHigh +0,1392.4641,1391.8217,1393.1066,5012.8921,4999.646,5026.1382,9233.5303,9232.7051,9234.3555,21014.016,21012.709,21015.322,20999.16,20998.666,20999.654,20187.006,19967.416,20406.596,170734.45,170729.66,170739.25,165067.44,165061.97,165072.91,41480.547,41479.074,41482.02,41303.828,40844.742,41762.914,166839.7,166833.48,166845.92 +1,1391.9521,1391.3705,1392.5338,5005.9937,4997.5942,5014.3931,9197.5605,9196.7617,9198.3594,20984.785,20983.42,20986.15,20944.287,20943.701,20944.873,20222.557,20221.738,20223.375,170379.27,170372.45,170386.08,164480.19,164473.75,164486.62,41338.699,41336.965,41340.434,41397.539,41395.738,41399.34,166265.69,166259,166272.38 +2,1383.8199,1367.1714,1400.4685,5018.9521,5012.7114,5025.1929,9215.9326,9215.2646,9216.6006,21009.275,21007.996,21010.555,20973.949,20973.555,20974.344,20256.393,20255.896,20256.889,170535.72,170531.75,170539.69,164753.2,164748.77,164757.64,41403.738,41402.945,41404.531,41465.727,41464.988,41466.465,166533.73,166531.06,166536.41 diff --git a/tests/workloads/multikernel/MI200/sysinfo.csv b/tests/workloads/multikernel/MI200/sysinfo.csv new file mode 100644 index 000000000..5ac906c7d --- /dev/null +++ b/tests/workloads/multikernel/MI200/sysinfo.csv @@ -0,0 +1,2 @@ +workload_name,command,ip_blocks,timestamp,version,hostname,cpu_model,sbios,linux_distro,linux_kernel_version,amd_gpu_kernel_version,cpu_memory,gpu_memory,rocm_version,vbios,compute_partition,memory_partition,gpu_model,gpu_arch,gpu_l1,gpu_l2,cu_per_gpu,simd_per_cu,se_per_gpu,wave_size,workgroup_max_size,chip_id,max_waves_per_cu,max_sclk,max_mclk,cur_sclk,cur_mclk,total_l2_chan,lds_banks_per_cu,sqc_per_gpu,pipes_per_gpu,hbm_bw,num_xcd +multikernel,./sample/vmem,SQ|LDS|SQC|TA|TD|TCP|TCC|SPI|CPC|CPF|roofline,Tue 22 Oct 2024 03:45:54 PM (CDT),2,dell-rack-10,AMD EPYC 7313 16-Core Processor,Dell Inc.2.2.5,Ubuntu 20.04.6 LTS,5.15.0-122-generic,,527949684,,6.2.1-99,113-D67301-064D,NA,NA,MI200,gfx90a,16,8192,104,4,8,64,1024,29711,32,1700,1600,1700,1600,32,32,56,4,1638.4,1 diff --git a/tests/workloads/multikernel/MI200/timestamps.csv b/tests/workloads/multikernel/MI200/timestamps.csv new file mode 100644 index 000000000..23782c1d1 --- /dev/null +++ b/tests/workloads/multikernel/MI200/timestamps.csv @@ -0,0 +1,8 @@ +Dispatch_ID,Kernel_Name,GPU_ID,queue-id,queue-index,pid,tid,Grid_Size,Workgroup_Size,LDS_Per_Workgroup,Scratch_Per_Workitem,Arch_VGPR,Accum_VGPR,SGPR,wave_size,sig,obj,DispatchNs,Start_Timestamp,End_Timestamp,CompleteNs +0,__amd_rocclr_fillBufferAligned.kd,2,0,0,791827,791827,256,256,0,0,12,4,32,64,0x0,0x7fd72f815800,9595697470571,9595697581667,9595697588067,9595697604693 +1,"global_write(int*, int) [clone .kd]",2,0,1,791827,791827,1,1,0,0,4,4,16,64,0x0,0x7fd72e4d9200,9595697964779,9595697981345,9595697983585,9595697985168 +2,"generic_write(int*, int, int) [clone .kd]",2,0,2,791827,791827,1,1,4096,0,4,4,48,64,0x0,0x7fd72e4d9240,9595697979166,9595697989025,9595697991265,9595698006628 +3,"global_read(int*, int) [clone .kd]",2,0,3,791827,791827,1,1,0,0,4,4,16,64,0x0,0x7fd72e4d9280,9595697986791,9595698033025,9595698035745,9595698036745 +4,"generic_read(int*, int, int) [clone .kd]",2,0,4,791827,791827,1,1,0,0,4,4,16,64,0x0,0x7fd72e4d92c0,9595698029551,9595698039905,9595698042305,9595698049348 +5,"global_atomic(int*, int) [clone .kd]",2,0,5,791827,791827,1,1,0,0,4,4,16,64,0x0,0x7fd72e4d9300,9595698036745,9595698072385,9595698076225,9595698076890 +6,"generic_atomic(int*, int, int) [clone .kd]",2,0,6,791827,791827,1,1,4096,0,4,4,48,64,0x0,0x7fd72e4d9340,9595698067583,9595698076225,9595698080065,9595698086618